Merge release-4-6 into master
authorRoland Schulz <roland@utk.edu>
Tue, 26 Feb 2013 19:06:06 +0000 (14:06 -0500)
committerRoland Schulz <roland@utk.edu>
Tue, 26 Feb 2013 19:10:05 +0000 (14:10 -0500)
Conflicts:
share/top/gurgle.dat
src/contrib/addquote.c (deleted)
src/programs/mdrun/pme_loadbal.c

Took for all python files the version with the new copyright:
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py

Change-Id: I8f9181d7d04d9da635ccb2a85f9d5c6255fcdb73

41 files changed:
1  2 
CMakeLists.txt
cmake/gmxManageMPI.cmake
share/top/gurgle.dat
src/gromacs/gmxlib/copyrite.c
src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nonbonded.c
src/gromacs/gmxpreprocess/readir.c
src/gromacs/legacyheaders/gmx_simd_macros.h
src/gromacs/legacyheaders/thread_mpi/atomic/xlc_ppc.h
src/gromacs/legacyheaders/types/nbnxn_pairlist.h
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/nbnxn_atomdata.c
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
src/gromacs/mdlib/nbnxn_search.c
src/gromacs/mdlib/vsite.c
src/programs/mdrun/md.c
src/programs/mdrun/pme_loadbal.c
src/programs/mdrun/pme_loadbal.h
src/tools/gmx_dipoles.c
src/tools/gmx_order.c
src/tools/gmx_tune_pme.c

diff --cc CMakeLists.txt
index 43dffd9933c76488ea62482c4a81b86c7968ff0e,123c53363e06e2282cb07e14b7bb339eb1895647..f663aa6bd133d978b7eb53c7de093d3a8cff2066
@@@ -29,11 -63,11 +29,11 @@@ mark_as_advanced(CUSTOM_VERSION_STRING
  if (CUSTOM_VERSION_STRING)
      set(PROJECT_VERSION ${CUSTOM_VERSION_STRING})
  endif (CUSTOM_VERSION_STRING)
- set(SOVERSION 6)
+ set(SOVERSION 8)
  # It is a bit irritating, but this has to be set separately for now!
 -SET(CPACK_PACKAGE_VERSION_MAJOR "4")
 -SET(CPACK_PACKAGE_VERSION_MINOR "6")
 -SET(CPACK_PACKAGE_VERSION_PATCH "1")
 +SET(CPACK_PACKAGE_VERSION_MAJOR "5")
 +SET(CPACK_PACKAGE_VERSION_MINOR "0")
 +#SET(CPACK_PACKAGE_VERSION_PATCH "0")
  
  # The numerical gromacs version. It is 40600 for 4.6.0.
  # The #define GMX_VERSION in gmx_header_config_h is set to this value.
Simple merge
index 3ce0c579b4c533cb7c6ec416bb643d21851c5c7e,4de9becae2e947d3931b136f84e8b2bb230d95d5..69661e9fabcdfd119fc57e73c035bb5e5c6ddfb2
- 366
- ��ߦ��ߨ���߬��������߻���ߦ��߷���ߋ�߻�߶�ߦ��������׷���������߶��
- �߳���ߋ��߳���߫���ߨ���߫���߻���׫������
- �����߽����߲�߷������ײ��������
- �������߾������߲����߽�����ߦ����ߺ�������׾�������߻�����������
- ����߶��ջ����߸�ߨ���ߨ���ߋ��߼�������׽������߽����
- ��߼��߻����߳���߶���߯���׭��߷��߼����߯�������
- ��،߬�߳�����ߨ���ߦ��߻��؋ߺ���ߴ���ߦ��������׭��߷��߼����߯�������
- ��ߋ��߻��߰�ߋ��߸������׭��߷��߼����߯�������
- ���؋߯���߲��߼����߶ؒ߼����ߋ�ߋ��ߺ����׫������
- ���؋߯���߲��߼����߶ؒ߼����ߋ�ߋ��ߺ����׸����������߹�����
- ���߬������߻�߽��߬������߻�߽��߬������߻�߽���׭�߬����������
- ���،߱��߽���߬��،ߵ���߸����������߲����׼������߽���������
- �����߸����ߖ�߱��߬�߸����׭��߷��߼����߯�������
- ��߳���߬����ߵ����߭������߫����،߬��������ߦ��؉�߹���������ޠײ����߯������
- ����߯�������ߖ�߾��������ײ��߶�߽�����
- ���؋߸�������߸���ߞߨ�������ޠײ����߯������
- ����߻�������߲�߽����ޠ׫��߾����
- �߼��������߲�߽����������ׯѵ�߷������
- ���߬�����߬����߳���߲���߶�،߲���ߺ�����߰�ߦ���߼������������׽������߽����
- ���؍�߶�������������׫������
- �����ߦ���߰������ׯ�߾��������
- ����ߴ���߰�߸���ߞ��ߦ���߾�������׹�ߥ�����
- �߷��߬�߲���߯�������ߞ��߫���߶߸��߲�ߞߨ�������׹�߽�����
- �߼�����߶�߶�ߋ��߹����ׯѵ�߷������
- ��ߦ��߫����߲��ߦ��ؓ�߸��߬�������׽������߽����
- ����߯���߬�����߳���߽����׳����
- ���߹��߾���߬����߲����߹��߸���߬�������߼��
- �������ߦ������ߦ����߫���،ߞ߸���߶����߶߳���߫���Ѡׯ���߹�������
- �߲��߱����ߞ߲����ױ�ߦ�����
- ��߰��߼����߹������ߋ��ߺ��߫���߼���߬�߹����׬������
- ����߼����߫���ߖ�ߞ߭�������ׯ���߹�������
- ����ߞ߳�����߯��������ױ���߼����
- ��ߞ߻���߻���ߨ����ױ���߼����
- �ؒ߰���߹�����ߨ���߶߸��߶�߭�����׬�����������
- �������߷���߾�����߹���������߲��׬������
- �����،߱�߭���߹��ߋ��ߨ����׵��߻��������
- ���ߨ���ߞ��߱�߯���߲����ߵ���ߞ߻���߽���׫��߬�������
- ����ߨ���߬�߮����߾����߶��ׯ������
- �����߸��ߞ߼�����ߋ�ߴ���߾���׫��߾����
- �����ߞߨ����߰�ߞߵ����׹�߽�����
- ��،߫���ߋ�߲���߰��׹�߽�����
- ��ؓ�߼���ߦ���߾�����߫���ޠ׹�ߥ�����
- ���߰�߭�������߬�����ת��֠ש��
- ���ߋ��߹����߽�ߨ���ߦ���׬���ߨ����
- ���߫��ߋ�߭��ߋ��ߪ��������׫������
- ����߲��߼���߾�ߞ߬�����׹�߽�����
- �ߨ������߬�����߶߸��ߪ���Ѡ׵�߳������
- �߾�߫������ߦ���߸���߲������׭��߷��߼����߯�������
- ������߶�߫��߲��������׼������߷����
- ���߼����߲���߲���߲����߾�ߞ߽�������׹�ߥ�����
- �ؓ�߲�����ߦ���߳��������ߞ��߶�ߋ��߲�������߶ؓ�߼�����߲�߰���׫������
- ���߬��������߲���߽�߹��߫���ߦ����׭��߷��߼����߯�������
- �ؒߞߨ�������ߞ��߶߽ؒ��������ׯ������
- ���߫����߫���ߦ��؍�߽�����ߨ���ߦ��߹���ߋ��ߩ����ת����߻����߬�����
- ���߶�߸���ߞ߳�����߬��������߳���߫����׫��߫����
- �������ߦ��ߖ�߳���ߴ������߸������׫�������߲�����
- ���߳���߽�����ߪ�����߻����׫�������߲�����
- ��������߰�߯���߬������׭��߷��߼����߯�������
- �߼����߫���ߦ��߷���ߞ��߾����ߦ���ײ��������
- ����߰���߷����߽����ת����߻����߬�����
- ���߳����߲�߻���ׯѵ�߷������
- �����ߦ��߳���ߋ�߽�ߋ��߲������߫��������׼������߽���������
- ����߲�߾�ߋ��߼�����߬����׭��߷��߼����߯�������
- ���߬���߬��߼��؋߸�߷���ߨ������ߞ߼���������׺�߼��������
- ����ߦ���߬����ߞ��߬����߰��߯������׹�ߥ�����
- ����߶�߱���߰�ߦ���߻�����߼���߫�����׺�߼��������
- ����ߞ߲��ߞ߹����׾�������߻�����������
- ���ߨ�����߰�ߋ��߽��߸�߭����ߞ��߭�����׵�߭�������
- ������߼����ߞ߭�����׽�����
- �ߨ���ߋ�ߴ���߭����߱���ײ��������
- ����،ߦ���߻���������߰�߻������׸�߲�������
- ����،ߋ��ߨ��߶�߲����ߺ���׸�߲�������
- ��������ߋ��߳���߽�������ߋ��߳���׵����߯������
- ����߽������ߋ��߬��ߨ����ߞ߯����߶�ߋ��߬���׹�ߥ�����
- ��������߷�����߰����ׯѵ�߷������
- ���߼����ߺ����ײ����߯������
- �߬����߳���ߨ������ת����߻����߬�����
- ����߰�߳���߼�����������ת����߻����߬�����
- ��߼����߫��߲���߶�߶�߼����ߞ߳���ײ��������
- �����߬����ߖ�߬������ײ����߯������
- ���������߳��߻���߰�ߋ��߹����ߞ��ߴ���߼����״���
- ����ߖ�߳���߲���߻����߸��߼�����ߞ��߸��߬���߰���ת����߻����߬�����
- ���؋߹�����߲�߷����׫�������߲�����
- ���߽�����߲���߻���ױ���߼����
- �߻��؋ߨ���ߋ�߼���߻����׫�������߲�����
- ��؍�߸����߷��ߦ��߷������׬������
- �����߽������߰�ߨ�����߻���߲�߫������׫�������߲�����
- ��،߽������߰�ߋ��߲�����߬������ׯ���߹�������
- �߻��؋ߨ���ߋ�߼����߾������߱��߻�������Ѡײ����߯������
- ����߻����؋߬����߶�،ߞ߽����߷����ޠ״Ѿ�߹��������
- ���߲�߻�߫����ת����߻����߬�����
- �߼��؋߬����߶��׻�������ߵ��
- ����߾����߳��߲�߻�߫����ת����߻����߬�����
- �������߫���ߦ��؍�߷������߯������׫���
- ��������ׯ���߹�������
- ����߰��ת����߻����߬�����
- ��������ߨ���߹������߼�������׫���
- ���ߪ�������ߖ�߬��������߶�߷����׵ѸѺѲ�߹�������
- ���؍�߾����ߋ�߷���߬��������׵����ߵ����
- �߬�����߽�ߋ��߯���߰�ߋ��ߦ����ת����߻����߬�����
- �����߼��؋߬���ߦ���߫�����߶�،߱���ߋ�߫����߷�߫�����׽����߼������
- ��߷����ߖ�ߵ���ߞ߲�����߶�ߞ߼������׹�߽�����
- ����߾�����߽�����ׯ���߹�������
- ���ߨ����ߖ�ߞ߹�������߯�����ײ��������
- ���������߳���ߖ�߰�������׽����߼������
- ����ߦ���߲����������ߞ��߯�����������ߞ��߭��߶�ߪ�ߦ���߬�����׹�ߥ�����
- �����ߋ��߽������߶�ߦ���߷����׫��߽��������
- ���߰��ߋ��߬���׫�������߲�����
- �߱���ߞ߳�����߯������׫�������߲�����
- ����߲�����߶���ߋ��߽���߽���ׯѵ�߷������
- �߳���߬�����߷��߳����׼�߲���������
- ����������߲����߲�����߹������׫��߽��������
- ��������������ׯ���߹�������
- �߻��؋߳���߻����׫��߽��������
- �����߰��ߋ��߸����ׯ���߹�������
- ���߼����߽�ߞ߬������׫��߽��������
- ��ߦ��؍�߬�߬������ߨ��ߞ���؋ߦ��߻�����׫��߽��������
- ���߯���߰�ߋ��߭��������߲��ߖ�߽����߰�߾��߬����ߨ���ߋ��߶���������߰�ߋ��߬������ߞ��ߋ��߫������߰�ߺ���߲��Ѡׯ���߹�������
- �������ߖ�߷�ߨ��߶�ߋ��߱���߰�߼������ߞ��߸���ߨ���߬��������ߋ��ߨ���߫������ߋ��ߩ�����߰�߻��������߹��߷�ߖ�߫����߷��߽������،ߴ�����ߞ��ߋ��߹�����߰�߳���߼�������Ѡׯ���߹�������
- ���߶ߨ���߬�����߻���ߪ���߫���ߨ���߸����ߩ��������ߞ��ߨ���߹������߾����߫����ߨ��߾������ߋ�߯�����ߞ��߻������߲�߽�������Ѡׯ���߹�������
- ���ߦ��ߨ���ߴ���߫���߲�߱���ߖ�ߋ��߳���ߨ���߶߳��߲�ߩ��������ߪ���߫���Ѡׯ���߹�������
- ����߰�ߋ��߽��������ߪ���������
- ���߻��؋ߨ����ߴ����ׯ���߹�������
- ���߻����߬������ߦ��؍�߰�߾�����׫��߽��������
- ���،ߞ߸���߬�����߽�����ײ����߯������
- �ؒ߸����߸��߲�������߰�ߦ���߾���ׯ���߹�������
- �����߳�����߹�������ׯ���߹�������
- ��،߱��ߦ���߹�����ׯ���߹�������
- ���ߨ���߽�߬��������߾�ߨ���߭������߶�ߦ���߶������׾�������߻�����������
- ���߼��������߸���߽���߽����׫��߽��������
- ���������ߨ����ߋ�߽�߱����ߞ��߹������׫������
- ������ߨ���߼������ׯ���߹�������
- ����߷�������ׯ���߹�������
- ���߹���ߦ���߬����߬�߬�����׹�߾�����
- ����߶�߭���߸����׬����ؑ�߯����
- �����ߦ���߶�����׻�ߩ��߻��߬�����
- ��������߬�����߸����߭����׭��߷��߼����߯�������
- ��ߨ��߲�߯��������ׯ���߹�������
- ��߻��؋߽�����߾������׳����
- �ߨ������ߞ߱��������߭����߲�߷����׹�ߥ�����
- ����ߋ��߻��ߞ��ߦ��ߨ���߻���ײ��������
- ��߾��߸��ߋ��߹���ߨ�߾��߸��߾����׳����
- ���߭�����߾�ߞ߫����׽�����،߬���߬�����
- ��߷���߱�߲�����׺�߼��������
- �����߲�߾����ײ������
- �߬����߯��������ׯ���߹�������
- �߯������ߖ�ߞ߬��߰�߼�����������׾ѯ�߷������
- ��߻����؋߷���ߋ�߽�߫��߫���ׯ���߹�������
- ���������،߸���ߺ�����߹��߬���߼������׳����
- ��،߾������ߋ��߭�����ׯ���߹�������
- �ؒ߾�߰������ׯ���߹�������
- �߭�����ߋ��߼���߭����߰��߰�ߋ��߯�����׼����߽���������
- �߬����߬����߹���ߞ߸��߱����ߺ����������ׯ������
- ����ߞ߳���߹�����߬��������׹�ߥ�����
- �����߽������ߋ��ߺ����׹�ߥ�����
- �������ߖ�߱��߾��������߶�߭������߬�����ׯѵ�߲����������
- ���߶�߭����߾������ߦ���ߺ����׹�ߥ�����
- �����ߦ��������צ���
- �߾�ߞߨ��������߫�����״��߼������
- ���ߋ�߸�߻����׽�����ߞ��߽��������
- ���߲����������ߖ�ߨ�����߭�����ׯѵ�ߩ��߲������
- �����������߷���߶߼����ׯѵ�ߩ��߲������
- ����������׬���߫����
- ��ߦ��߻��؋߳���߼���߮�����߼����ߦ���߸����߹����צ���߬��������
- ����߶�߬�����ߋ�߬����߶�ؓ�߱����߬����ײ��������
- �ؒߞߵ����׹�߽�����
- ��ߨ�����؋߷���ߋ�ߨ���߰���߶�ߞߨ�����׽�����ߞ��߽��������
- �������ߋ�ߋ��߯����߾�����ߪ���������
- ��ߦ��߬��߲�߸������߷����ߴ����߲�߻����׭��߷��߼����߯�������
- ���߯�����߽�����׹�ߥ�����
- ���߯�����߼����߶��׹�ߥ�����
- �߸��ߞ߹����߻�����߽����׹�ߥ�����
- ��߳���߯�����߬����߶�߫����߼�������׹�ߥ�����
- ���߸��ߋ�߭�����ߋ�߶��׾Ѻ�߫�����
- ����ߨ��߯�����߼����׽������
- ����ߨ��߭�����߼����׽��������
- ����߰�ߋ�ߦ���ߺ���׹�߽�����
- ����ߪ�ߋ��ߩ�����߾����ߨ���ߋ��߫�����׵����ߵ����
- �������߹��ߺ����ׯѵ�ߩ��߲������
- ����ߞ߱���߻���׭�߲��������
- ��������߷�������ߋ�߯�����׹�߽�����
- ����ߋ��߹����߽������ߪ���������
- ����ߨ��߼����׽�����ߞ��߽��������
- �����߸������߸���߭�����߭����ׯѵ�߲����������
- ���߷���߹��������߼�����߹���߽������׼�������߼����
- ��ߖ�߳���������׾ѭ�ߩ��߽������
- ���߼������߶���ߋ��߬������׬������߸��������
- �߾�ߞ߯���߳�������߼������׳����߳����
- ��������߼�������߼�����׬�����߲������߹���߷����
- ����߫�����߻���߶��׽�����ߞ��߽��������
- ����ߪ�ߦ���߭�����߽����������߼��
- ���߻�ߦ��߳���ߦ���ߩ�������߬�߹����׬��������߼�����߼�������
- ��،߬�߹���߶�،߬����׹�߽�����
- ���߽��ߺ��߽��������׵ѹ�ߴ�������
- ����߻���������ߨ����߰�ߦ���߻�����߫�����׬����߮����߶���
- ����߼������߱����ߦ���תѬ�߾����
- ���؋ߺ��߫���ߦ�����߬����׹�ߥ�����
- �߻�߶�߾��ߋ��߫����ײ��������
- ����߸���߲�ߞ߽����׹�߽�����
- �������߼�����������Ѡ׸��ߺ�����
- ���߶�߻���ߞ��߼��߶�ߨ����׫��ߨ����������
- ����߰�ߋ��߽���ߨ���ߞ߽�������߽���׫��߭�������
- ��߷���߸���߯��߯��߯��߯��߯���׹�߽�����
- �������߶�ߋ��߬������߶�ߦ��؉�߸��ߋ��߸����ת����߻����߬�����
- �߷���ߞ߽��߼���߰�߯�����߻��������ת����߻����߬�����
- ��،߽������߭�����߲���ޠײ����߯������
- �؉�߸��߫��߫���������ߞ��ߞ߲����������׽�߷������
- �߾�ߋ��߯���������������߯������߻�������ߦ���߯�������Ѡ׸��ߺ�����
- �����߰��ߨ����ߋ��߷������߸��׹�ߥ�����
- �ߨ��߽���ߋ�߷���߾���������׹�ߥ�����
- ��߫���ߞ߭���߯�������׹�ߥ�����
- ����؍�߭��߷���׭��߷��߼����߯�������
- ����߽����߸��ߞ߳�����߲�������ׯ������
- ��߲�߸����߶�،ߋ��߹����߬����׽������߽����
- ��������ߋ��߽���߾���ߨ���ߋ��߬���׬�߷�������
- �������׵ѳ�߯������
- ���������ߖ�߬�������߫�����߻����װ���������
- ���߲��ߦ��ߴ����߶ؒ߭�����߰��װ���������
- �ؒ߱��߸����߻��߷����ޠ׬������
- �؛߳���߲�����߲�������߽�����߶�߫���߬������߳�����׸��������
- ����،߾������ߪ�����߮�����׬�߽����
- ����߯�������߼�����؋߻���߲�߾����״Ѿ�߹��������
- ���،߸�߷���߰��߶�ߞ߲����׳����
- �����ߞ��߶�����߫���ߞ��߱��߳����ײ��������
- ���߾�߫���߬���߶��߫���߬���߾��؋߾�߽��߾�ߦ��߫����߶�߶�Ѡ׵�����߽�����
- ��߾��߲��߽�߻����߽��߶߾��؋߱�߻������Ѡ׵�����߽�����
- �����߱��߰���߬�����߷�߾���߹���������߲����߽������Ѡײ����߽��������
- ������߬�����ߨ���ߋ��߹������׺��߽��������߬����ߩ��߶�߬��������
- ������߰��߰�ߺ���߬��������������߾�����������߶����������קв�����
- ���ߖ�ߞ߻��׹���������
- �����������߬�������߾�������׬���߫����
- ��߶ߨ���ߦ��߶ߨ����߸���߲�ߞ߽�����׹�߽�����
- ���߱����߼���ߋ�߽��߾�������߹��߷��߯����׳����
- ���߼������߸��߻������߸��ߨ������׹�߽�����
- ����߽���߽���߽����߶ߨ���ߦ��ߖ�߲�߭����ש����߽����
- �����߱��߲�ߵ��ߖ�ߺ�����߫����߻���������׽���������
- ����ߞ߲������ߞ���؋ߦ���������ט��������ߦ���Ѡ׽���������
- ��߶ߨ�����ߦ��ߋ�ߪ���������߫����߶ߨ����ߺ������ߖ�߽������׵�߼�������
- ��Ґ��׫����ߨ�����
- �������ߜ����������Ѡ׶������Я��������
- ��Ґ��ߨ�؍�߶�߫�������׬�������
- ���؋ߦ��߲���߫���߫����߸�߹�������׽����߼������
- ���߻���߶��̻�׸�����߼�������
- ��Ґ������߭����߾�����׳�����߾��������
- ׫���ߒ��������ߚ������ߏ�����ߋ��ߞ�����֠׫���
- �߲�߸���߫���ߴ�����ߴ�����ޠ׬����߯����
- �����ߞ��߽���ߒ������׬����߯����
- ���،ߪ����߾��߳��،ߪ������׭��߷��߼����߯�������
- �؛߽�ߨ����߶�߶߼�����׭��߷��߼����߯�������
- �����߲��߽�ߋ��߹����߹��������߽��߶�،߲���ߖ�ߞ߷��������߽��������׭��߷��߼����߯�������
- ����������߲���߸��׭��߷��߼����߯�������
- �����،߱������ߨ�߼��؋߹����؜��ߨ�߼��߻�߶�ߖ�ߋ��߲���׶������
- ��،߼�����߭����߹��ߪ��ޠ׬����߯����
- �������ߋ��߯����ߐ�ߞߵ���߮߼�������ת����߻����߬�����
- �����҈����ߨ�҈���ߨ����ר���߬�����
- ����ߖ�߫�����ޠ׬���ߨ���ߺ������߶߫��߯������߲������
- ���ߋ�ߋ��߼����ߐ�ߺ������ߞ��ߪ������ײ�������߾�����߬�������ߨ������߬�����������
- ����ߗ��ߑ���߼��������ߨ�����߼����ߋ���߭����������׳����ߖ�߲������ߙ��ߒ�������߾�����߬�������ߨ������߬�����������
- ��ߋ��ߺ��߬������߼����߻���ߋ�߯�������ׯ�߉ћ�߽����
- �ؒ߳������ߙ��ߞ߱��߬����������׬����߫�����߯������
- �߮���߲�ߵ��߽������߳������׽����
- ����ߙ��߳�����������׭�����ߨ��������
- ���ߦ��߱���ߖ�߸�����׾����߼������
- ���߼��߽�߫��ߺ�����ߦ��߼��߽�߫��߳���ߞ��ߦ��߼��߽�߰�߫����׵�߼�������
- �����׽�߷����
- ���߻��զ���ߪ��߼������������׷ѵѼ�߽���������
- ���ߨ����؋ߦ��ߞ�߲�߹��������׸�߸��������
- ���߼��߾�����߸�߰�߭����߳����װ���������
- ��߾�����ߦ���߳����߶�߶����������׫�����
- ����߱�ߒ����Ѡ׋����
- ������Ѭ������������׶���ߖ�����
- �߱���߳����߱��߸�����׶���߯����ߴ���߯�������
- ��،߱��߻���ߦ���߽��߶�،߸������߫�����׽��߻�����
- �ߪ���߫�߼����߽��߫�����߷���߼�������׽��߻�����
- �������ߖ�ߋ��߽�����ߴ�����߬�������ߐ�߆���߰����߭�����׬���߬�����
- ����߾��߫���߲����߭�����߷���߫�߸�߫�߼��������׭����
- �����ߊ�ߊ��߱���،߷����ޠױ����
- ����ߴ���ߋ��߹������߽�����׫��߬�������߹���߻���ߋ��߻�����
- ��ߋ��߲��������߫���߼���ߐ�ߦ�������ߞ����ߺ���������׵�߬��������
- �߹���ߞ߸����߻����������ߖ�ߋ��߹�����׫��ߺ������߬������߽����
- ��ߦ��߷���ߞ߲���ߐ�ߦ���߰����׸�������
- �ؓ�߲����ߦ���߻���׭��߷��߼����߯�������
- ���߶ߺ���ߨ�����ߨ��ߦ���߳����׭��߷��߼����߯�������
- ����ߞ߲�����ߨ����߶߭�������߲������׭��߷��߼����߯�������
- �����،߬����߫���ߋ�߼�����ߋ��߭���ߦ��؍�߰��׳��ߥ��������
- �����߶�߾���߰���߫���߶�،߰����׳����ߴ�������
- ��ߵ���߫�����߽������׽�����ߴ����
- ر���ߨ�ߞ��ߝ��ߒ����߭���ޠ׫��������߻�
- ��������ߍ�������ߋ�ߜ�����ߞ�ߚ����ߞ�������׸��ߋ���
- �������ߑ��߬�������׵�߽����
- ���߫����߸���߸�������׺������
- ��߫���ߋ��߭����߭���ߙ��ߞ�߾���������ײ����߯������
- �߈��ߛ��������߶߈��ߍ����������׫��߬������
- ���߼����������ߨ��ߵ���߭�����׽������߽����
- �����߾���߸����߷����׹����߽�����
- ���ҋ��ҋ��ߴ�ߝ���ߝ����׫��߬�������߯��������
- �����ߖ�߈�����ߐ�ߋ��߆�����׫��߬�������߯��������
- �������Ҳ������Ҳ������Ҳ���״����ߴ�����
- �����������߬����ߞ��߽�������Ѡׯ�ߵ�߷������
- ����������߷�߳�����߷�߲����ߪ�߶�߻������װ���������
- �߯�����ߩ������߽������߲����ߞ߯�����߹����׻����߬���������
- ����ߛ��؋ߗ���ߞ��ߝ������ߖ�߶�����ߌ�ߋ���ߗ���ߋ�ߌ�������ߋ����׫��߫�����
- ��،߼������߲�ߋ�߽����ߒ�߽�����߾������Ѡש��ߛ��߸�����
- �ߝ������ߖ�ߒ�������ߜ����߶ؒߐ���׫��߭�������
- �����߸����߷��ޠ׫��߭�������
- �����ߋ���ߖ�ߋ��ߝ���ߑ���׫��߭�������
- ����ߒ�߆���ߌ��������ߞ��߶߈���ߋ����ߖ��׭��߷��߼����߯�������
- ����߲����߬����߆���߬����׳�����
- �ߝ������ߖ�ߒ�������ߜ����߶ؒߐ���׫��߭�������
- �����߸����߷��ޠ׫��߭�������
- �����ߋ���ߖ�ߋ��ߝ���ߑ���׫��߭�������
- ����߲����߬����߆���߬����׳�����
- ����ߞ����ߓ���ߞ߬���������ߓ��ߛ����ߘ��ߔ������׭��߷��߼����߯�������
- �����������߭ѶѯѠ׭��߷��߼����߯�������
- ���؋ߦ��ߨ���ߦ��߱����߲��߷���߻����߽���߸����׼������߽���������
- ������߱����߳�����߱�ұ������ߙ���߱�߷�������׸����߽��������
- �؛ߝ�߬���ߞ��ߨ���ߖ�߶߈��ߖ�߳ѾѠ׫��߲����ߞ��ߋ��߯�����
- ��،ߪ�����������߫���߼�������߲����ߦ��߹���ײ����
- ��߽�������ߞ��߯�������ׯ���������߲�߬������ߞ��߱�������ױ������֠׸����߽��������
- ���߲�߶�����߬���߳��،߫���߬����������߫������߰�ߦ��ߦ��߾���������ߋ��߯�������׸����߽��������
- ��ߦ��߷���߬��߲������ߐ�߬�������������ߐ�߾��������������ߖ�ߦ���߹������׸����߽��������
- �����ߞ߳��������ߖ�߆���߷����׸����߽��������
- �����ߦ��؍�ߨ�������ׯ����߻���������ߖ�߯�����߯�����߯����
- ����߬�������߫���������ߞ��߾�������׸����߽��������
- ����߼������߭�����ߦ���ߦ���߼������߹��ߦ���ߞ��ߵ���߳���߾��߰����߼������ߖ�ߨ���߽����ߦ���׸����߽��������
- ����߫���߱���،ߞ߻���߸���ߨ��������׫��߽�������
- ����߯����߫����߹����߬�߻����������߹���߰����׸����߽��������
- ���߹������ߐ�߯����߈��߶������������߲�����׹����߷�������
- �߈��ߚ������ߋ�ߓ����ߑ��ߋ�ߍ����ׯ��������߾�߬��������������
- �ߒ������ߋ�ߘ��ߋ��ߗ����ߐ�߈���ߛ���ߝ�����߈����׺�߳�������
- ��ߝ���ߋ�ߋ��ߍ���ߙ���ߊ����߈����߆��ߜ����׹����߾�����
- ��،ߕ���ߋ��߈��ߋ���ߌ����ߖ�ߛ����׽����ߋ�߬�����
- ���߹���߲�ߨ���߶�������׫��߳���߽�������
- �ߊ���ߋ�ߝ�ߝ����ߞ��ߌ������ߝ��ߑ��߶ߛ���ߖ�ߝ�����ײ���߳��
- ����߈���ߛ��߮����������ߔ�����ߊ�ߛ��ߛ��߽�����ߞ���������߬��ߖ���߻����ߛ��ߝ�����߈�ߌ��ߖ���ר��ߌ���߷������
- ����ߛ��߬�������ߌ�������ߛ��߳�����ר��ߌ���߷������
- ����߾�����߈���ߛ��߳����߰����ר��߬���߷������
- ���ߓ���߯���������߈�����׸����߽��������
- ������ߒ����ߐ��ߐ�ߑ�������ߓ���ߖ�ߍ������ߜ����׸����߽��������
- �������ߗ��ߚ�������ׯ�����߼�������
- �����،ߐ���ߒ����ߋ�ߒ���ߑ��ߍ���������׾�����߲�������
- ���ߌ������ߏ�����ߋ���߶�����ߋ���ߗ�،ߑ��ߋ��ߐ���ߐ��ߙ������ߙ���ߋ��ߌ���ת����߻����߬�����
- ���߈��ߌ���������ߒ��ߘ����߅��ߔ������ױ���߷�����
- ����ߖ�߆��؍�߈����ߞ����ߋ��ߘ����ߵ�ߵ�ߞ�ߋ��ߝ�����ߐ�ߋ��ߌ���׭������߻�������
- ����ߐ�ߝ����߳��،ߏ���ߖ�ߗ����ׯѵ�߷������
- ����ߞ�ߋ�����ߒ�߈���Ҍ�����ߞ����ׯѵ�߷������
- ��ߖ�ߋ��ߖ��������ߜ��������ߌ�����׹����߷�������
- ����ߚߐ��������ߚߋ���ߏ����������ׯ�����
- �������ߐ�ߞߍ������߈�������ߒ��������ߜ�������׾߼����
- ������ߋ��ߏ���ߐ�ߋ��ߜ����ߖ�ߌ����ߖ�،ߜ�����������ߖ�ߑ���ׯ����߷�������
- ����������،ߙ�����ߙ���ߏ���������ש��ߛ��߸����߸���������
- ���ߋ���ߙ��ߋ�����ߖ�ߐ����׵�߷�����
- ����،ߋ��ߏ�����߆��߈���،ߋ��ߌ������׭��߷��߼����߯�������
- ��߫����߶�߱�߸�����߶�߫��߷�����ߦ��ߴ���߶�،߰����߼��߱��߽�߫�������׸����߽��������
- �������������ߖ�ߞ��ߋ���ߘ������׹����ߥ�����
- �߳���ߦ���߶ߨ���ߴ���ߦ��߳����׫����ߖ�߹��������
- ������߶ߌ���ߑ�����ߜ������߈���ߚ����������ײ�����߽�����
- ����ߖ�ߞߗ����������ߗ��ߒ�����ߖ�ߞߘ����׫��߭�������
- ���ߖ�،ߙ�����߈���ߗ�������ߞ��ߗ�������ߐ�ߜ���������ײ�������
- ��ߖ�߈����؋ߙ��ߝ��ߓ����߈�؛ߗ���ߑ�ߓ���ߞ�ߞ���׫��ߪ��������
- �����،ߑ�߈��߆��ߜ��ߍ���ߐ�ߞ�ߚ����������׸�����߸��������
- �ߓ���ߋ�߈����ߋ���߶ߙ���ߓ���߶ߛ�ߌ���������׼���߼�������
- ���߶ߗ���ߚ���������ߓ�����ߋ���ߚ���������ߚ����׻���߯������
- �����ߐ�ߜ�����������ߞ��ߋ��ߌ������߲�ߌ������ߖ�߈�������ߑ�����ߜ��������ײ�������
- ��،ߊ����ߜ���ߋ���ߐ���߆��ߞ��߶ߔ����״���߽����
- ��������ߘ����ߙ������ߓ����ߐ�ߓ�����ߐ����׽����ߺ���߯����
- �؉�ߝ��������ߝ�����ߞ߉���������ߌ����ߋ��ߐ���ߒ���߶ؒߚ�����ߖ�ߙ���ߞ������߶؉�ߔ�����ߒ������ײ���ߥ����������
 -360
++361
+ If You Want Something Done You Have to Do It Yourself_(Highlander II)
+ I Live the Life They Wish They Did_(Tricky)
+ Jesus Built My Hotrod_(Ministry)
+ Nurture Another Mind, Before Yours Expires_(Arrested Development)
+ Hmm, It *Does* Go Well With the Chicken_(Beastie Boys)
+ We Can Dance Like Iggy Pop_(Red Hot Chili Peppers)
+ It's So Lonely When You Don't Even Know Yourself_(Red Hot Chili Peppers)
+ Do the Dog On the Ground_(Red Hot Chili Peppers)
+ Don't Push Me, Cause I'm Close to the Edge_(Tricky)
+ Don't Push Me, Cause I'm Close to the Edge_(Grandmaster Flash)
+ Bum Stikkie Di Bum Stikkie Di Bum Stikkie Di Bum_(R. Slijngaard)
+ She's Not Bad, She's Just Genetically Mean_(Captain Beefheart)
+ Being Great is Not So Good_(Red Hot Chili Peppers)
+ If Life Seems Jolly Rotten, There's Something You've Forgotten !_(Monty Python)
+ Your Proposal is Accepted_(Men In Black)
+ Don't Grumble, Give a Whistle !_(Monty Python)
+ Stop Drinking My Beer !_(The Amps)
+ I Calculate My Birthright_(P.J. Harvey)
+ You Should Sleep Late Man, It's Much Easier On Your Constitution_(Beastie Boys)
+ You're Insignificant_(Tricky)
+ Check Your Output_(P. Ahlstrom)
+ What Kind Of Guru are You, Anyway ?_(F. Zappa)
+ I Had So Many Problem, and Then I Got Me a Walkman_(F. Black)
+ I Caught It In the Face_(P.J. Harvey)
+ If You Touch Me, You'll Get Shocked_(Beastie Boys)
+ This Puke Stinks Like Beer_(LIVE)
+ Art For Arts Sake, Money For Gods Sake_(10 CC)
+ A Man Needs a Maid_(N. Young)
+ No One Could Foresee the End That Came So Fast_(Slayer)
+ Stay Cool, This is a Robbery_(Pulp Fiction)
+ With a Little Penknife_(Nick Cave)
+ In a Deep Deep Well_(Nick Cave)
+ I'm Only Faking When I Get It Right_(Soundgarden)
+ Sisters Have Always Fascinated Me_(Speech)
+ There's No Room For the Weak_(Joy Division)
+ All Work and No Play Makes Jack a Dull Boy_(The Shining)
+ They Were So Quiet About It_(Pixies)
+ Never Get a Chance to Kick Ass_(The Amps)
+ Yeah, a Wuzz, Or a Jerk_(F. Black)
+ It's Time to Move On_(F. Black)
+ It'll Cure Your Asthma Too !_(F. Zappa)
+ Out Of Register Space (Ugh)_(Vi)
+ May the Force Be With You_(Star Wars)
+ You Try to Run the Universe_(Tricky)
+ This May Come As a Shock_(F. Black)
+ I Wonder, Should I Get Up..._(J. Lennon)
+ I Am Testing Your Grey Matter_(Red Hot Chili Peppers)
+ Insane In Tha Membrane_(Cypress Hill)
+ You Could Make More Money As a Butcher_(F. Zappa)
+ I'll Master Your Language, and In the Meantime I'll Create My Own_(Tricky)
+ The Stingrays Must Be Fat This Year_(Red Hot Chili Peppers)
+ I'm a Wishbone and I'm Breaking_(Pixies)
+ You Think That You're Bigger When You Fill the Void_(Urban Dance Squad)
+ And It Goes a Little Something Like This_(Tag Team)
+ Kissing You is Like Kissing Gravel_(Throwing Muses)
+ You Look Better Upside Down_(Throwing Muses)
+ Lunatics On Pogo Sticks_(Red Hot Chili Peppers)
+ I Could Take You Home and Abuse You_(Magnapop)
+ Move Over Hogey Bear_(Urban Dance Squad)
+ You Leave Me Dry_(P.J. Harvey)
+ Would You Like to Be the Monster Tonight ?_(Captain Beefheart)
+ Meet Me At the Coffee Shop_(Red Hot Chili Peppers)
+ She Says She Can't Go Home Without a Chaperone_(E. Costello)
+ Keep Your Shoes and Socks On, People_(F. Zappa)
+ What If None Of Your Dreams Come True ?_(E. Costello)
+ Give a Man a Fish_(Arrested Development)
+ The Wheels On the Bus Go Round and Round_(J. Richman)
+ I Want to Know Right Now_(Meatloaf)
+ What's Your Definition Of Dirty ?_(G. Michael)
+ Here's the Way It Might End_(G. Michael)
+ Breaking the Law, Breaking the Law_(Judas Priest)
+ Just Because the Sun Wants a Place In the Sky_(F. Zappa)
+ Baseball Heroes Only_(P.J. Harvey)
+ One Cross Each_(Monty Python)
+ I Snipe Like Wesley_(Urban Dance Squad)
+ Hold On Like Cliffhanger_(Urban Dance Squad)
+ It Costs Too Much If It Costs a Lot_(Magnapop)
+ Every Sperm is Sacred_(Monty Python)
+ Everybody Lie Down On the Floor and Keep Calm_(KLF)
+ Love is Like Moby Dick, Get Chewed and Get Spat Out_(Urban Dance Squad)
+ Don't Follow Me Home_(Throwing Muses)
+ All Beauty Must Die_(Nick Cave)
+ I Don't Want to Calm Down_(Throwing Muses)
+ We're Gonna Hit You Harder_(Scoter)
+ Shake Barrels Of Whisky Down My Throat_(Throwing Muses)
+ It's Because Of the Metric System_(Pulp Fiction)
+ I Don't Want to Catch Anybody Not Drinking._(Monty Python)
+ This Doesn't Suck, It's a Black Hole !_(K.A. Feenstra)
+ Let Me Do This_(Urban Dance Squad)
+ I Can't Shake It_(Dinosaur Jr)
+ Once Again Let Me Do This_(Urban Dance Squad)
+ Pretend That You're Hercule Poirot_(TeX)
+ Exactly_(Pulp Fiction)
+ Sort Of_(Urban Dance Squad)
+ Proceed, With Fingers Crossed_(TeX)
+ The Universe is Somewhere In Here_(J.G.E.M. Fraaije)
+ You're About to Hurt Somebody_(Jazzy Jeff)
+ I Should Be the Pimp Of the Year_(Urban Dance Squad)
+ Jesus Can't Save You, Though It's Nice to Think He Tried_(Black Crowes)
+ My Heart is Just a Muscle In a Cavity_(F. Black)
+ Step Aside, Butch_(Pulp Fiction)
+ The World is a Friendly Place_(Magnapop)
+ Sometimes Life is Obscene_(Black Crowes)
+ Take Your Medications and Preparations and Ram It Up Your Snout_(F. Zappa)
+ Count the Bubbles In Your Hair_(The Breeders)
+ You Own the Sun_(Throwing Muses)
+ I Need a Little Poison_(Throwing Muses)
+ Ease Myself Into the Body Bag_(P.J. Harvey)
+ A Lady Shaves Her Legs_(C. Meijering)
+ Motherhood Means Mental Freeze_(The Breeders)
+ Correctomundo_(Pulp Fiction)
+ I Don't Like Dirt_(The Breeders)
+ Bring Out the Gimp_(Pulp Fiction)
+ You Could Be a Shadow_(The Breeders)
+ If You're So Special Why aren't You Dead ?_(The Breeders)
+ The Path Of the Righteous Man is Beset On All Sides With the Iniquities Of the Selfish and the Tyranny Of Evil Men._(Pulp Fiction)
+ Blessed is He Who In the Name Of Charity and Good Will Shepherds the Weak Through the Valley Of Darkness, For He is Truly His Brother's Keeper and the Finder Of Lost Children._(Pulp Fiction)
+ And I Will Strike Down Upon Thee With Great Vengeance and With Furious Anger Those Who Attempt to Poison and Destroy My Brothers._(Pulp Fiction)
+ And You Will Know That My Name is the Lord When I Lay My Vengeance Upon Thee._(Pulp Fiction)
+ Step On the Brakes_(2 Unlimited)
+ You Don't Wanna Know_(Pulp Fiction)
+ You Dirty Switch, You're On Again_(The Breeders)
+ She's a Good Sheila Bruce_(Monty Python)
+ I'm Gonna Get Medieval On Your Ass_(Pulp Fiction)
+ Three Little Fonzies_(Pulp Fiction)
+ It's Not Your Fault_(Pulp Fiction)
+ You Will Be Surprised At What Resides In Your Inside_(Arrested Development)
+ The Carpenter Goes Bang Bang_(The Breeders)
+ Everybody Wants to Be Naked and Famous_(Tricky)
+ Royale With Cheese_(Pulp Fiction)
+ Shit Happens_(Pulp Fiction)
+ You Fill Your Space So Sweet_(F. Apple)
+ Push It Real Good_(Salt 'n' Pepa)
+ Check Your Input_(D. Van Der Spoel)
+ Catholic School Girls Rule_(Red Hot Chili Peppers)
+ It Was My Pleasure_(Pulp Fiction)
+ We Don't Bother Anyone_(LIVE)
+ I Wrapped a Newspaper Round My Head_(F. Zappa)
+ Kick the Dog and You Will Die_(Magnapop)
+ We All Get the Flu, We All Get Aids_(LIVE)
+ One Ripple At a Time_(Bianca's Smut Shack)
+ We Have No Money_(E. Clementi)
+ Carry Me Away_(Motors)
+ I Solve Problems_(Pulp Fiction)
+ A Protein is a Set Of Coordinates_(A.P. Heiner)
+ It Doesn't Have to Be Tip Top_(Pulp Fiction)
+ Everybody's Good Enough For Some Change_(LIVE)
+ It's Against the Rules_(Pulp Fiction)
+ I'm An Oakman_(Pulp Fiction)
+ I Ripped the Cord Right Out Of the Phone_(Capt. Beefheart)
+ I Smell Smoke From a Gun Named Extinction_(Pixies)
+ With a Lead Filled Snowshoe_(F. Zappa)
+ Right Between the Eyes_(F. Zappa)
+ BioBeat is Not Available In Regular Shops_(P.J. Meulenhoff)
+ Rub It Right Accross Your Eyes_(F. Zappa)
+ Shake Yourself_(YES)
+ I Am a Wonderful Thing_(Kid Creole)
+ Way to Go Dude_(Beavis and Butthead)
+ The Microsecond is Within Reach_(P.J. Van Maaren)
+ Microsecond Here I Come_(P.J. Van Maaren)
+ Confirmed_(Star Trek)
+ If You Don't Like Cool Quotes Check Your GMXRC File_(Your Sysadmin)
+ When It Starts to Start It'll Never Stop_(Magnapop)
+ I'm a Jerk_(F. Black)
+ It Wouldn't Hurt to Wipe Once In a While_(Beavis and Butthead)
+ Welcome to the Power Age_(2 Unlimited)
+ If You See Me Getting High, Knock Me Down_(Red Hot Chili Peppers)
+ The Poodle Bites_(F. Zappa)
+ The Poodle Chews It_(F. Zappa)
+ I Got a Forty Dollar Bill_(F. Zappa)
+ We Look Pretty Sharp In These Clothes_(F. Zappa)
+ You Got to Relate to It_(A.E. Torda)
+ That Was Pretty Cool_(Beavis)
+ That Was Really Cool_(Butthead)
+ Hang On to Your Ego_(F. Black)
+ Pump Up the Volume Along With the Tempo_(Jazzy Jeff)
+ Ramones For Ever_(P.J. Van Maaren)
+ Have a Nice Day_(R. McDonald)
+ Whatever Happened to Pong ?_(F. Black)
+ Make the Floor Burn_(2 Unlimited)
+ That Was Cool_(Beavis and Butthead)
+ These Gromacs Guys Really Rock_(P.J. Meulenhoff)
+ You Hear Footsteps Coming From Behind_(Colossal Cave)
+ It is Lunchtime_(A.R. Van Buuren)
+ You Crashed Into the Swamps_(Silicon Graphics)
+ I Am a Poor Lonesome Cowboy_(Lucky Luke)
+ Clickety Clickety Click_(System Manager From Hell)
+ Been There, Done It_(Beavis and Butthead)
+ Load Up Your Rubber Bullets_(10 CC)
+ How Do You Like Your Vacation So Far ?_(Speed 2 - Cruise Control)
+ It's So Fast It's Slow_(F. Black)
+ Ich Bin Ein Berliner_(J.F. Kennedy)
+ Take Dehydrated Water On Your Desert Trips_(Space Quest III)
+ Your Country Needs YOU_(U.S. Army)
+ Don't Eat That Yellow Snow_(F. Zappa)
+ I Do It All the Time_(Magnapop)
+ Just Give Me a Blip_(F. Black)
+ Garbage Collecting..._(GNU Emacs)
+ Cut It Deep and Cut It Wide_(The Walkabouts)
+ Beat On the Brat With a Baseball Bat_(The Ramones)
+ My Head Goes Pop Pop Pop Pop Pop_(F. Black)
+ Hangout In the Suburbs If You've Got the Guts_(Urban Dance Squad)
+ I Have a Bad Case Of Purple Diarrhea_(Urban Dance Squad)
+ It's Bicycle Repair Man !_(Monty Python)
+ I've Got Two Turntables and a Microphone_(B. Hansen)
+ I Am the Psychotherapist. Please, Describe Your Problems._(GNU Emacs)
+ Watch Out Where the Huskies Go_(F. Zappa)
+ I Was Born to Have Adventure_(F. Zappa)
+ Is That a Real Poncho ?_(F. Zappa)
+ They're Red Hot_(Red Hot Chili Peppers)
+ Your Bones Got a Little Machine_(Pixies)
+ Oh My God ! It's the Funky Shit_(Beastie Boys)
+ Throwing the Baby Away With the SPC_(S. Hayward)
+ Engage_(J.L. Picard)
+ Everybody is Smashing Things Down_(Offspring)
+ Hey Man You Know, I'm Really OK_(Offspring)
+ I'm Not Gonna Die Here !_(Sphere)
+ I'd Like Monday Mornings Better If They Started Later_(Garfield)
+ Here's Another Useful Quote_(S. Boot)
+ Wild Pointers Couldn't Drag Me Away_(K.A. Feenstra)
+ Let's Go Hang Out In a Mall_(LIVE)
+ These are Ideas, They are Not Lies_(Magnapop)
+ Bad As This Shit Is, This Shit Ain't As Bad As You Think It Is._(Jackie Brown)
+ My Ass May Be Dumb, But I Ain't No Dumbass._(Jackie Brown)
+ Jesus Not Only Saves, He Also Frequently Makes Backups._(Myron Bradshaw)
+ Player Sleeps With the Fishes_(Ein Bekanntes Spiel Von ID Software)
+ Bailed Out Of Edge Synchronization After 10,000 Iterations_(X/Motif)
+ God is a DJ_(Faithless)
+ Encountered Subspace Anomaly_(Star Trek)
+ If I Were You I Would Give Me a Break_(F. Black)
+ She Needs Cash to Buy Aspirine For Her Pain_(LIVE)
+ Got Coffee, Got Donuts, Got Wasted_(F. Black)
+ Boom Boom Boom Boom, I Want You in My Room_(Venga Boys)
+ Right Now My Job is Eating These Doughnuts_(Bodycount)
+ Wait a Minute, aren't You.... ? (gunshots) Yeah._(Bodycount)
+ If I Wanted You to Understand This, I Would Explain it Better_(J. Cruijff)
+ Uh-oh_(Tinky Winky)
+ Uh-oh, We're In Trouble_(Shampoo)
+ Can't You Make This Thing Go Faster ?_(Black Crowes)
+ Get Down In 3D_(George Clinton)
+ Uh-oh .... Right Again_(Laurie Anderson)
+ (That makes 100 errors; please try again.)_(TeX)
+ O My God, They Killed Kenny !_(South Park)
+ Drugs are Bad, mmokay_(South Park)
+ Let's Unzip And Let's Unfold_(Red Hot Chili Peppers)
+ I'd Be Water If I Could_(Red Hot Chili Peppers)
+ Space May Be the Final Frontier, But It's Made in a Hollywood Basement_(Red Hot Chili Peppers)
+ Everything Must Go_(Red Hot Chili Peppers)
+ There's Nothing We Can't Fix, 'coz We Can Do It in the Mix_(Indeep)
+ It's Coming Right For Us !_(South Park)
+ Disturb the Peace of a John Q Citizen_(Urban Dance Squad)
+ Wicky-wicky Wa-wild West_(Will Smith)
+ This is Tense !_(Star Wars Episode I The Phantom Menace)
+ Fly to the Court of England and Unfold_(Macbeth, Act 3, Scene 6, William Shakespeare)
+ Why, how now, Claudio ! Whence Comes this Restraint ?_(Lucio in Measure for measure, Act 1, Scene 4, William Shakespeare)
+ In the End Science Comes Down to Praying_(P. v.d. Berg)
+ I'm Looking for a New Simulation_(Stone Temple Pilots)
+ I Quit My Job Blowing Leaves_(Beck)
+ Live for Liposuction_(Robbie Williams)
+ All You Need is Greed_(Aztec Camera)
+ You Can Be Too Early, You Can Be Too Late and You Can Be On Time_(J. Cruijff)
+ RTFM_(B. Hess)
+ Why Do *You* Use Constraints ?_(H.J.C. Berendsen)
+ Why Weren't You at My Funeral ?_(G. Groenhof)
+ You Can Always Go On Ricky Lake_(Offspring)
+ As Always Your Logic Is Impeccable_(Tuvok)
+ set: No match._(tcsh)
+ AH ....Satisfaction_(IRIX imapd)
+ I Need Love, Not Games_(Iggy Pop & Kate Pierson)
+ It's Not Dark Yet, But It's Getting There_(Bob Dylan)
+ I Used To Care, But Things Have Changed_(Bob Dylan)
+ Working in the Burger Kings, Spitting on your Onion Rings_(Slim Shady)
+ Does All This Money Really Have To Go To Charity ?_(Rick)
+ Yeah, uh uh, Neil's Head !_(Neil)
+ In the Meantime, Take Care of Yourself aaand Eachother_(J. Springer)
+ I Feel a Great Disturbance in the Force_(The Emperor Strikes Back)
+ Do You Have a Mind of Your Own ?_(Garbage)
+ I'll Match Your DNA_(Red Hot Chili Peppers)
+ All I Ever Wanted Was Your Life_(Red Hot Chili Peppers)
+ Just a Minute While I Reinvent Myself_(Red Hot Chili Peppers)
+ There's Still Time to Change the Rope You're On_(Led Zeppelin)
+ Baby, It Aint Over Till It's Over_(Lenny Kravitz)
+ It Just Tastes Better_(Burger King)
+ 'Nay. We are but men.' Rock!_(Tenacious D)
+ Cowardly refusing to create an empty archive_(GNU tar)
+ Shaken, not Stirred_(J. Bond)
+ Oh, There Goes Gravity_(Eminem)
+ Is This the Right Room for an Argument ?_(Monty Python)
+ I was detained, I was restrained_(The Smiths)
+ The Candlelight Was Just Right_(Beastie Boys)
+ Fresh Air, Green Hair_(Frank Black)
+ Rat-tat-tat Ka boom boom_(The Smashing Pumpkins)
+ Youth is wasted on the young_(The Smashing Pumpkins)
+ Miggida-Miggida-Miggida-Mac_(Kriss Kross)
+ Interfacing Space and Beyond..._(P. J. Harvey)
+ Everything He Lacks, He Makes Up In Denial_(Offspring)
+ A Pretty Village Burning Makes a Pretty Fire_(David Sandstrom)
+ They don't have any beavers in India, so they have to simulate them_(The Tubes)
+ It's Calling Me to Break my Bonds, Again..._(Van der Graaf)
+ I believe in miracles cause I'm one_(The Ramones)
+ Gabba Gabba Hey!_(The Ramones)
+ Shoot them in the back now_(The Ramones)
+ Read me your scripture and I will twist it_(Red Hot Chili Peppers)
+ Good Music Saves your Soul_(Lemmy)
+ I believe in miracles cause I'm one_(The Ramones)
+ Gabba Gabba Hey!_(The Ramones)
+ Good Music Saves your Soul_(Lemmy)
+ Move about like a Scientist, lay down, get kissed_(Red Hot Chili Peppars)
+ California, R.I.P._(Red Hot Chili Peppars)
+ Don't You Wish You Never Met Her, Dirty Blue Gene?_(Captain Beefheart)
+ Nobody Never Learnt No-Nothing from No History_(Gogol Bordello)
+ I'd be Safe and Warm if I was in L.A._(The Mamas and the Papas)
+ It's Unacceptable That Choclate Makes You Fat_(MI 3)
+ My Brothers are Protons (Protons!), My Sisters are Neurons (Neurons)_(Gogol Bordello)
+ Put Me Inside SSC, Let's Test Superstring Theory, Oh Yoi Yoi Accelerate the Protons_(Gogol Bordello)
+ Do You Have Sex Maniacs or Schizophrenics or Astrophysicists in Your Family?_(Gogol Bordello)
+ Screw a Lightbulb in your Head_(Gogol Bordello)
+ Alas, You're Welcome_(Prof. Dumbledore in Potter Puppet Pals)
+ Your Shopping Techniques are Amazing_(Gogol Bordello)
+ Your Country Raised You, Your Country Fed You, and Just Like Any Other Country it Will Break You_(Gogol Bordello)
+ What They Need's a Damn Good Whacking_(The Beatles)
+ They Paint Their Faces So Differently From Ours_(Gogol Bordello)
+ The Feeling of Power was Intoxicating, Magic_(Frida Hyvonen)
+ I was elected to lead, not to read_(President A. Schwarzenegger)
+ I managed to get two hours of work done before work_(E. Lindahl)
+ Go back to the rock from under which you came_(Fiona Apple)
+ It's just the way this stuff is done_(Built to Spill)
+ You Fill Me With Inertia_(The Long Blondes)
+ I used to be blond and stupid, but now I dyed it black_(Miss Li)
+ Aber wenn der Quarterback kommt, um dir die Brille abzunehmen Sag ihm: Danke die bleibt wo sie ist_(Wir sind Helden)
+ Jede der Scherben spiegelt das Licht_(Wir sind Helden)
+ Ohne Arbeit wird das Leben Oede_(Wir Sind Helden)
+ Act like Prometheus would_(Gogol Bordello)
+ Making merry out of nothing, like in refugee camp_(Gogol Bordello)
+ History has expired_(PubMed Central)
+ There's only music to make new ringtones_(Arctic Monkeys)
+ Can someone please tell Icarus that he's not the only one falling from the sky?_(Urban Dance Squad)
+ Ich war schwanger, mir gings zum kotzen_(Nina Hagen)
+ What if you're wrong about the great Ju Ju at the bottom of the sea?_(Richard Dawkins)
+ Come on boys, Let's push it hard_(P.J. Harvey)
+ Look at these, my work-strong arms_(P.J. Harvey)
+ Is it the invisible chemistry stuff?_(Frida Hyvonen)
+ Nada e organico, e tudo programado_(Pitty)
+ Sitting on a rooftop watching molecules collide_(A Camp)
+ Though the path of the comet is sure, it's constitution is not_(Peter Hammill)
+ Everything's formed from particles_(Van der Graaf Generator)
+ The time for theory is over_(J. Hajdu)
+ What's the point, yo, what's the spread?_(Red Hot Chili Peppers)
+ If There Is No Guitar In The House, You Know It's Owner Can Not Be Trusted_(Gogol Bordello)
+ Carbohydrates is all they groove_(Frank Zappa)
+ Never, I said never, compare with experiment_(Magnus Bergh)
+ Suzy is a headbanger, her mother is a geek_(The Ramones)
+ Now it's filled with hundreds and hundreds of chemicals_(Midlake)
+ If it weren't for bad luck, we'd have no luck at all_(The Unthanks)
+ There's no way you can rely on an experiment_(Gerrit Groenhof)
+ I like to wait, then I feel like I do something_(Carl Caleman)
+ Can I have everything louder than everything else?_(Deep Purple)
+ He's using code that only you and I know_(Kate Bush)
+ Chemical gases filling lungs of little ones_(Black Eyed Peas)
+ I've basically become a vegetarian since the only meat I'm eating is from animals I've killed myself_(Mark Zuckerberg)
++Years of calculations and the stress, My science is waiting, nearly complete_(Midlake)
index b875dabb5fda5f6e41a4eec4badb9b01546ec2dd,0000000000000000000000000000000000000000..1a9a61bb7cd2497971e8704f24229a2c1b164543
mode 100644,000000..100644
--- /dev/null
@@@ -1,797 -1,0 +1,785 @@@
- void f(char *a)
- {
-     int i;
-     int len = strlen(a);
-     for (i = 0; i < len; i++)
-     {
-         a[i] = ~a[i];
-     }
- }
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +#ifdef HAVE_LIBMKL
 +#include <mkl.h>
 +#endif
 +#ifdef GMX_FFT_FFTW3
 +#include <fftw3.h>
 +#endif
 +
 +/* This file is completely threadsafe - keep it that way! */
 +
 +#include <string.h>
 +#include <ctype.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "string2.h"
 +#include "macros.h"
 +#include <time.h>
 +#include "random.h"
 +#include "statutil.h"
 +#include "copyrite.h"
 +#include "strdb.h"
 +#include "futil.h"
 +#include "vec.h"
 +#include "buildinfo.h"
 +#include "gmx_cpuid.h"
 +
 +static void pr_two(FILE *out, int c, int i)
 +{
 +    if (i < 10)
 +    {
 +        fprintf(out, "%c0%1d", c, i);
 +    }
 +    else
 +    {
 +        fprintf(out, "%c%2d", c, i);
 +    }
 +}
 +
 +void pr_difftime(FILE *out, double dt)
 +{
 +    int        ndays, nhours, nmins, nsecs;
 +    gmx_bool   bPrint, bPrinted;
 +
 +    ndays    = dt/(24*3600);
 +    dt       = dt-24*3600*ndays;
 +    nhours   = dt/3600;
 +    dt       = dt-3600*nhours;
 +    nmins    = dt/60;
 +    dt       = dt-nmins*60;
 +    nsecs    = dt;
 +    bPrint   = (ndays > 0);
 +    bPrinted = bPrint;
 +    if (bPrint)
 +    {
 +        fprintf(out, "%d", ndays);
 +    }
 +    bPrint = bPrint || (nhours > 0);
 +    if (bPrint)
 +    {
 +        if (bPrinted)
 +        {
 +            pr_two(out, 'd', nhours);
 +        }
 +        else
 +        {
 +            fprintf(out, "%d", nhours);
 +        }
 +    }
 +    bPrinted = bPrinted || bPrint;
 +    bPrint   = bPrint || (nmins > 0);
 +    if (bPrint)
 +    {
 +        if (bPrinted)
 +        {
 +            pr_two(out, 'h', nmins);
 +        }
 +        else
 +        {
 +            fprintf(out, "%d", nmins);
 +        }
 +    }
 +    bPrinted = bPrinted || bPrint;
 +    if (bPrinted)
 +    {
 +        pr_two(out, ':', nsecs);
 +    }
 +    else
 +    {
 +        fprintf(out, "%ds", nsecs);
 +    }
 +    fprintf(out, "\n");
 +}
 +
 +
 +gmx_bool be_cool(void)
 +{
 +    /* Yes, it is bad to check the environment variable every call,
 +     * but we dont call this routine often, and it avoids using
 +     * a mutex for locking the variable...
 +     */
 +#ifdef GMX_COOL_QUOTES
 +    return (getenv("GMX_NO_QUOTES") == NULL);
 +#else
 +    /*be uncool*/
 +    return FALSE;
 +#endif
 +}
 +
 +void space(FILE *out, int n)
 +{
 +    fprintf(out, "%*s", n, "");
 +}
 +
-         f(retstring);
 +static void sp_print(FILE *out, const char *s)
 +{
 +    int slen;
 +
 +    slen = strlen(s);
 +    space(out, (80-slen)/2);
 +    fprintf(out, "%s\n", s);
 +}
 +
 +static void ster_print(FILE *out, const char *s)
 +{
 +    int  slen;
 +    char buf[128];
 +
 +    snprintf(buf, 128, ":-)  %s  (-:", s);
 +    slen = strlen(buf);
 +    space(out, (80-slen)/2);
 +    fprintf(out, "%s\n", buf);
 +}
 +
 +
 +static void pukeit(const char *db, const char *defstring, char *retstring,
 +                   int retsize, int *cqnum)
 +{
 +    FILE  *fp;
 +    char **help;
 +    int    i, nhlp;
 +    int    seed;
 +
 +    if (be_cool() && ((fp = low_libopen(db, FALSE)) != NULL))
 +    {
 +        nhlp = fget_lines(fp, &help);
 +        /* for libraries we can use the low-level close routines */
 +        ffclose(fp);
 +        seed   = time(NULL);
 +        *cqnum = nhlp*rando(&seed);
 +        if (strlen(help[*cqnum]) >= STRLEN)
 +        {
 +            help[*cqnum][STRLEN-1] = '\0';
 +        }
 +        strncpy(retstring, help[*cqnum], retsize);
 +        for (i = 0; (i < nhlp); i++)
 +        {
 +            sfree(help[i]);
 +        }
 +        sfree(help);
 +    }
 +    else
 +    {
 +        strncpy(retstring, defstring, retsize);
 +    }
 +}
 +
 +void bromacs(char *retstring, int retsize)
 +{
 +    int dum;
 +
 +    pukeit("bromacs.dat",
 +           "Groningen Machine for Chemical Simulation",
 +           retstring, retsize, &dum);
 +}
 +
 +void cool_quote(char *retstring, int retsize, int *cqnum)
 +{
 +    char *tmpstr;
 +    char *s, *ptr;
 +    int   tmpcq, *p;
 +
 +    if (cqnum != NULL)
 +    {
 +        p = cqnum;
 +    }
 +    else
 +    {
 +        p = &tmpcq;
 +    }
 +
 +    /* protect audience from explicit lyrics */
 +    snew(tmpstr, retsize+1);
 +    pukeit("gurgle.dat", "Thanx for Using GROMACS - Have a Nice Day",
 +           tmpstr, retsize-2, p);
 +
 +    if ((ptr = strchr(tmpstr, '_')) != NULL)
 +    {
 +        *ptr = '\0';
 +        ptr++;
 +        sprintf(retstring, "\"%s\" %s", tmpstr, ptr);
 +    }
 +    else
 +    {
 +        strcpy(retstring, tmpstr);
 +    }
 +    sfree(tmpstr);
 +}
 +
 +void CopyRight(FILE *out, const char *szProgram)
 +{
 +    static const char * CopyrightText[] = {
 +        "Written by Emile Apol, Rossen Apostolov, Herman J.C. Berendsen,",
 +        "Aldert van Buuren, Pär Bjelkmar, Rudi van Drunen, Anton Feenstra, ",
 +        "Gerrit Groenhof, Peter Kasson, Per Larsson, Pieter Meulenhoff, ",
 +        "Teemu Murtola, Szilard Pall, Sander Pronk, Roland Schulz, ",
 +        "Michael Shirts, Alfons Sijbers, Peter Tieleman,\n",
 +        "Berk Hess, David van der Spoel, and Erik Lindahl.\n",
 +        "Copyright (c) 1991-2000, University of Groningen, The Netherlands.",
 +        "Copyright (c) 2001-2010, The GROMACS development team at",
 +        "Uppsala University & The Royal Institute of Technology, Sweden.",
 +        "check out http://www.gromacs.org for more information.\n"
 +    };
 +
 +    static const char * LicenseText[] = {
 +        "This program is free software; you can redistribute it and/or",
 +        "modify it under the terms of the GNU Lesser General Public License",
 +        "as published by the Free Software Foundation; either version 2.1",
 +        "of the License, or (at your option) any later version."
 +    };
 +
 +    /* Dont change szProgram arbitrarily - it must be argv[0], i.e. the
 +     * name of a file. Otherwise, we won't be able to find the library dir.
 +     */
 +#define NCR (int)asize(CopyrightText)
 +/* TODO: Is this exception still needed? */
 +#ifdef GMX_FAHCORE
 +#define NLICENSE 0 /*FAH has an exception permission from GPL to allow digital signatures in Gromacs*/
 +#else
 +#define NLICENSE (int)asize(LicenseText)
 +#endif
 +
 +    char buf[256], tmpstr[1024];
 +    int  i;
 +
 +#ifdef GMX_FAHCORE
 +    set_program_name("Gromacs");
 +#else
 +    set_program_name(szProgram);
 +#endif
 +
 +    ster_print(out, "G  R  O  M  A  C  S");
 +    fprintf(out, "\n");
 +
 +    bromacs(tmpstr, 1023);
 +    sp_print(out, tmpstr);
 +    fprintf(out, "\n");
 +
 +    ster_print(out, GromacsVersion());
 +    fprintf(out, "\n");
 +
 +    /* fprintf(out,"\n");*/
 +
 +    /* sp_print(out,"PLEASE NOTE: THIS IS A BETA VERSION\n");
 +
 +       fprintf(out,"\n"); */
 +
 +    for (i = 0; (i < NCR); i++)
 +    {
 +        sp_print(out, CopyrightText[i]);
 +    }
 +    for (i = 0; (i < NLICENSE); i++)
 +    {
 +        sp_print(out, LicenseText[i]);
 +    }
 +
 +    fprintf(out, "\n");
 +
 +    snprintf(buf, 256, "%s", Program());
 +#ifdef GMX_DOUBLE
 +    strcat(buf, " (double precision)");
 +#endif
 +    ster_print(out, buf);
 +    fprintf(out, "\n");
 +}
 +
 +
 +void thanx(FILE *fp)
 +{
 +    char cq[1024];
 +    int  cqnum;
 +
 +    /* protect the audience from suggestive discussions */
 +    cool_quote(cq, 1023, &cqnum);
 +
 +    if (be_cool())
 +    {
 +        fprintf(fp, "\ngcq#%d: %s\n\n", cqnum, cq);
 +    }
 +    else
 +    {
 +        fprintf(fp, "\n%s\n\n", cq);
 +    }
 +}
 +
 +typedef struct {
 +    const char *key;
 +    const char *author;
 +    const char *title;
 +    const char *journal;
 +    int         volume, year;
 +    const char *pages;
 +} t_citerec;
 +
 +void please_cite(FILE *fp, const char *key)
 +{
 +    static const t_citerec citedb[] = {
 +        { "Allen1987a",
 +          "M. P. Allen and D. J. Tildesley",
 +          "Computer simulation of liquids",
 +          "Oxford Science Publications",
 +          1, 1987, "1" },
 +        { "Berendsen95a",
 +          "H. J. C. Berendsen, D. van der Spoel and R. van Drunen",
 +          "GROMACS: A message-passing parallel molecular dynamics implementation",
 +          "Comp. Phys. Comm.",
 +          91, 1995, "43-56" },
 +        { "Berendsen84a",
 +          "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak",
 +          "Molecular dynamics with coupling to an external bath",
 +          "J. Chem. Phys.",
 +          81, 1984, "3684-3690" },
 +        { "Ryckaert77a",
 +          "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen",
 +          "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes",
 +          "J. Comp. Phys.",
 +          23, 1977, "327-341" },
 +        { "Miyamoto92a",
 +          "S. Miyamoto and P. A. Kollman",
 +          "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models",
 +          "J. Comp. Chem.",
 +          13, 1992, "952-962" },
 +        { "Cromer1968a",
 +          "D. T. Cromer & J. B. Mann",
 +          "X-ray scattering factors computed from numerical Hartree-Fock wave functions",
 +          "Acta Cryst. A",
 +          24, 1968, "321" },
 +        { "Barth95a",
 +          "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel",
 +          "Algorithms for Constrained Molecular Dynamics",
 +          "J. Comp. Chem.",
 +          16, 1995, "1192-1209" },
 +        { "Essmann95a",
 +          "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ",
 +          "A smooth particle mesh Ewald method",
 +          "J. Chem. Phys.",
 +          103, 1995, "8577-8592" },
 +        { "Torda89a",
 +          "A. E. Torda and R. M. Scheek and W. F. van Gunsteren",
 +          "Time-dependent distance restraints in molecular dynamics simulations",
 +          "Chem. Phys. Lett.",
 +          157, 1989, "289-294" },
 +        { "Tironi95a",
 +          "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren",
 +          "Generalized reaction field method for molecular dynamics simulations",
 +          "J. Chem. Phys",
 +          102, 1995, "5451-5459" },
 +        { "Hess97a",
 +          "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije",
 +          "LINCS: A Linear Constraint Solver for molecular simulations",
 +          "J. Comp. Chem.",
 +          18, 1997, "1463-1472" },
 +        { "Hess2008a",
 +          "B. Hess",
 +          "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation",
 +          "J. Chem. Theory Comput.",
 +          4, 2008, "116-122" },
 +        { "Hess2008b",
 +          "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl",
 +          "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
 +          "J. Chem. Theory Comput.",
 +          4, 2008, "435-447" },
 +        { "Hub2010",
 +          "J. S. Hub, B. L. de Groot and D. van der Spoel",
 +          "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
 +          "J. Chem. Theory Comput.",
 +          6, 2010, "3713-3720"},
 +        { "In-Chul99a",
 +          "Y. In-Chul and M. L. Berkowitz",
 +          "Ewald summation for systems with slab geometry",
 +          "J. Chem. Phys.",
 +          111, 1999, "3155-3162" },
 +        { "DeGroot97a",
 +          "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen",
 +          "Prediction of Protein Conformational Freedom From Distance Constrains",
 +          "Proteins",
 +          29, 1997, "240-251" },
 +        { "Spoel98a",
 +          "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen",
 +          "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.",
 +          "J. Chem. Phys.",
 +          108, 1998, "10220-10230" },
 +        { "Wishart98a",
 +          "D. S. Wishart and A. M. Nip",
 +          "Protein Chemical Shift Analysis: A Practical Guide",
 +          "Biochem. Cell Biol.",
 +          76, 1998, "153-163" },
 +        { "Maiorov95",
 +          "V. N. Maiorov and G. M. Crippen",
 +          "Size-Independent Comparison of Protein Three-Dimensional Structures",
 +          "PROTEINS: Struct. Funct. Gen.",
 +          22, 1995, "273-283" },
 +        { "Feenstra99",
 +          "K. A. Feenstra and B. Hess and H. J. C. Berendsen",
 +          "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems",
 +          "J. Comput. Chem.",
 +          20, 1999, "786-798" },
 +        { "Timneanu2004a",
 +          "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel",
 +          "Auger Electron Cascades in Water and Ice",
 +          "Chem. Phys.",
 +          299, 2004, "277-283" },
 +        { "Pascal2011a",
 +          "T. A. Pascal and S. T. Lin and W. A. Goddard III",
 +          "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics",
 +          "Phys. Chem. Chem. Phys.",
 +          13, 2011, "169-181" },
 +        { "Caleman2011b",
 +          "C. Caleman and P. J. van Maaren and M. Hong and J. S. Hub and L. T. da Costa and D. van der Spoel",
 +          "Force Field Benchmark of Organic Liquids: Density, Enthalpy of Vaporization, Heat Capacities, Surface Tension, Isothermal Compressibility, Volumetric Expansion Coefficient, and Dielectric Constant",
 +          "J. Chem. Theo. Comp.",
 +          8, 2012, "61" },
 +        { "Lindahl2001a",
 +          "E. Lindahl and B. Hess and D. van der Spoel",
 +          "GROMACS 3.0: A package for molecular simulation and trajectory analysis",
 +          "J. Mol. Mod.",
 +          7, 2001, "306-317" },
 +        { "Wang2001a",
 +          "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman",
 +          "Solvation model based on weighted solvent accessible surface area",
 +          "J. Phys. Chem. B",
 +          105, 2001, "5055-5067" },
 +        { "Eisenberg86a",
 +          "D. Eisenberg and A. D. McLachlan",
 +          "Solvation energy in protein folding and binding",
 +          "Nature",
 +          319, 1986, "199-203" },
 +        { "Eisenhaber95",
 +          "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf",
 +          "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies",
 +          "J. Comp. Chem.",
 +          16, 1995, "273-284" },
 +        { "Hess2002",
 +          "B. Hess, H. Saint-Martin and H.J.C. Berendsen",
 +          "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water",
 +          "J. Chem. Phys.",
 +          116, 2002, "9602-9610" },
 +        { "Hetenyi2002b",
 +          "Csaba Hetenyi and David van der Spoel",
 +          "Efficient docking of peptides to proteins without prior knowledge of the binding site.",
 +          "Prot. Sci.",
 +          11, 2002, "1729-1737" },
 +        { "Hess2003",
 +          "B. Hess and R.M. Scheek",
 +          "Orientation restraints in molecular dynamics simulations using time and ensemble averaging",
 +          "J. Magn. Res.",
 +          164, 2003, "19-27" },
 +        { "Rappe1991a",
 +          "A. K. Rappe and W. A. Goddard III",
 +          "Charge Equillibration for Molecular Dynamics Simulations",
 +          "J. Phys. Chem.",
 +          95, 1991, "3358-3363" },
 +        { "Mu2005a",
 +          "Y. Mu, P. H. Nguyen and G. Stock",
 +          "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis",
 +          "Prot. Struct. Funct. Bioinf.",
 +          58, 2005, "45-52" },
 +        { "Okabe2001a",
 +          "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami",
 +          "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble",
 +          "Chem. Phys. Lett.",
 +          335, 2001, "435-439" },
 +        { "Hukushima96a",
 +          "K. Hukushima and K. Nemoto",
 +          "Exchange Monte Carlo Method and Application to Spin Glass Simulations",
 +          "J. Phys. Soc. Jpn.",
 +          65, 1996, "1604-1608" },
 +        { "Tropp80a",
 +          "J. Tropp",
 +          "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances",
 +          "J. Chem. Phys.",
 +          72, 1980, "6035-6043" },
 +        { "Bultinck2002a",
 +          "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere",
 +          "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations",
 +          "J. Phys. Chem. A",
 +          106, 2002, "7887-7894" },
 +        { "Yang2006b",
 +          "Q. Y. Yang and K. A. Sharp",
 +          "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization",
 +          "J. Chem. Theory Comput.",
 +          2, 2006, "1152-1167" },
 +        { "Spoel2005a",
 +          "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen",
 +          "GROMACS: Fast, Flexible and Free",
 +          "J. Comp. Chem.",
 +          26, 2005, "1701-1719" },
 +        { "Spoel2006b",
 +          "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu",
 +          "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media",
 +          "J. Phys. Chem. B",
 +          110, 2006, "4393-4398" },
 +        { "Spoel2006d",
 +          "D. van der Spoel and M. M. Seibert",
 +          "Protein folding kinetics and thermodynamics from atomistic simulations",
 +          "Phys. Rev. Letters",
 +          96, 2006, "238102" },
 +        { "Palmer94a",
 +          "B. J. Palmer",
 +          "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids",
 +          "Phys. Rev. E",
 +          49, 1994, "359-366" },
 +        { "Bussi2007a",
 +          "G. Bussi, D. Donadio and M. Parrinello",
 +          "Canonical sampling through velocity rescaling",
 +          "J. Chem. Phys.",
 +          126, 2007, "014101" },
 +        { "Hub2006",
 +          "J. S. Hub and B. L. de Groot",
 +          "Does CO2 permeate through Aquaporin-1?",
 +          "Biophys. J.",
 +          91, 2006, "842-848" },
 +        { "Hub2008",
 +          "J. S. Hub and B. L. de Groot",
 +          "Mechanism of selectivity in aquaporins and aquaglyceroporins",
 +          "PNAS",
 +          105, 2008, "1198-1203" },
 +        { "Friedrich2009",
 +          "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande",
 +          "Accelerating Molecular Dynamic Simulation on Graphics Processing Units",
 +          "J. Comp. Chem.",
 +          30, 2009, "864-872" },
 +        { "Engin2010",
 +          "O. Engin, A. Villa, M. Sayar and B. Hess",
 +          "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface",
 +          "J. Phys. Chem. B",
 +          114, 2010, "11093" },
 +        { "Fritsch12",
 +          "S. Fritsch, C. Junghans and K. Kremer",
 +          "Adaptive molecular simulation study on structure formation of toluene around C60 using Gromacs",
 +          "J. Chem. Theo. Comp.",
 +          8, 2012, "398" },
 +        { "Junghans10",
 +          "C. Junghans and S. Poblete",
 +          "A reference implementation of the adaptive resolution scheme in ESPResSo",
 +          "Comp. Phys. Comm.",
 +          181, 2010, "1449" },
 +        { "Wang2010",
 +          "H. Wang, F. Dommert, C.Holm",
 +          "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency",
 +          "J. Chem. Phys. B",
 +          133, 2010, "034117" },
 +        { "Sugita1999a",
 +          "Y. Sugita, Y. Okamoto",
 +          "Replica-exchange molecular dynamics method for protein folding",
 +          "Chem. Phys. Lett.",
 +          314, 1999, "141-151" },
 +        { "Kutzner2011",
 +          "C. Kutzner and J. Czub and H. Grubmuller",
 +          "Keep it Flexible: Driving Macromolecular Rotary Motions in Atomistic Simulations with GROMACS",
 +          "J. Chem. Theory Comput.",
 +          7, 2011, "1381-1393" },
 +        { "Hoefling2011",
 +          "M. Hoefling, N. Lima, D. Haenni, C.A.M. Seidel, B. Schuler, H. Grubmuller",
 +          "Structural Heterogeneity and Quantitative FRET Efficiency Distributions of Polyprolines through a Hybrid Atomistic Simulation and Monte Carlo Approach",
 +          "PLoS ONE",
 +          6, 2011, "e19791" },
 +        { "Hockney1988",
 +          "R. W. Hockney and J. W. Eastwood",
 +          "Computer simulation using particles",
 +          "IOP, Bristol",
 +          1, 1988, "1" },
 +        { "Ballenegger2012",
 +          "V. Ballenegger, J.J. Cerda, and C. Holm",
 +          "How to Convert SPME to P3M: Influence Functions and Error Estimates",
 +          "J. Chem. Theory Comput.",
 +          8, 2012, "936-947" },
 +        { "Garmay2012",
 +          "Garmay Yu, Shvetsov A, Karelov D, Lebedev D, Radulescu A, Petukhov M, Isaev-Ivanov V",
 +          "Correlated motion of protein subdomains and large-scale conformational flexibility of RecA protein filament",
 +          "Journal of Physics: Conference Series",
 +          340, 2012, "012094" }
 +    };
 +#define NSTR (int)asize(citedb)
 +
 +    int   j, index;
 +    char *author;
 +    char *title;
 +#define LINE_WIDTH 79
 +
 +    if (fp == NULL)
 +    {
 +        return;
 +    }
 +
 +    for (index = 0; (index < NSTR) && (strcmp(citedb[index].key, key) != 0); index++)
 +    {
 +        ;
 +    }
 +
 +    fprintf(fp, "\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n");
 +    if (index < NSTR)
 +    {
 +        /* Insert newlines */
 +        author = wrap_lines(citedb[index].author, LINE_WIDTH, 0, FALSE);
 +        title  = wrap_lines(citedb[index].title, LINE_WIDTH, 0, FALSE);
 +        fprintf(fp, "%s\n%s\n%s %d (%d) pp. %s\n",
 +                author, title, citedb[index].journal,
 +                citedb[index].volume, citedb[index].year,
 +                citedb[index].pages);
 +        sfree(author);
 +        sfree(title);
 +    }
 +    else
 +    {
 +        fprintf(fp, "Entry %s not found in citation database\n", key);
 +    }
 +    fprintf(fp, "-------- -------- --- Thank You --- -------- --------\n\n");
 +    fflush(fp);
 +}
 +
 +#ifdef GMX_GIT_VERSION_INFO
 +/* Version information generated at compile time. */
 +#include "gromacs/utility/gitversion.h"
 +#else
 +/* Fall back to statically defined version. */
 +static const char _gmx_ver_string[] = "VERSION " VERSION;
 +#endif
 +
 +const char *GromacsVersion()
 +{
 +    return _gmx_ver_string;
 +}
 +
 +void gmx_print_version_info_gpu(FILE *fp);
 +
 +void gmx_print_version_info(FILE *fp)
 +{
 +    fprintf(fp, "Gromacs version:    %s\n", _gmx_ver_string);
 +#ifdef GMX_GIT_VERSION_INFO
 +    fprintf(fp, "GIT SHA1 hash:      %s\n", _gmx_full_git_hash);
 +    /* Only print out the branch information if present.
 +     * The generating script checks whether the branch point actually
 +     * coincides with the hash reported above, and produces an empty string
 +     * in such cases. */
 +    if (_gmx_central_base_hash[0] != 0)
 +    {
 +        fprintf(fp, "Branched from:      %s\n", _gmx_central_base_hash);
 +    }
 +#endif
 +
 +#ifdef GMX_DOUBLE
 +    fprintf(fp, "Precision:          double\n");
 +#else
 +    fprintf(fp, "Precision:          single\n");
 +#endif
 +    fprintf(fp, "Memory model:       %lu bit\n", 8*sizeof(void *));
 +
 +#ifdef GMX_THREAD_MPI
 +    fprintf(fp, "MPI library:        thread_mpi\n");
 +#elif defined(GMX_MPI)
 +    fprintf(fp, "MPI library:        MPI\n");
 +#else
 +    fprintf(fp, "MPI library:        none\n");
 +#endif
 +#ifdef GMX_OPENMP
 +    fprintf(fp, "OpenMP support:     enabled\n");
 +#else
 +    fprintf(fp, "OpenMP support:     disabled\n");
 +#endif
 +#ifdef GMX_GPU
 +    fprintf(fp, "GPU support:        enabled\n");
 +#else
 +    fprintf(fp, "GPU support:        disabled\n");
 +#endif
 +    /* A preprocessor trick to avoid duplicating logic from vec.h */
 +#define gmx_stringify2(x) #x
 +#define gmx_stringify(x) gmx_stringify2(x)
 +    fprintf(fp, "invsqrt routine:    %s\n", gmx_stringify(gmx_invsqrt(x)));
 +    fprintf(fp, "CPU acceleration:   %s\n", GMX_CPU_ACCELERATION_STRING);
 +
 +    /* TODO: Would be nicer to wrap this in a gmx_fft_version() call, but
 +     * since that is currently in mdlib, can wait for master. */
 +#ifdef GMX_FFT_FFTPACK
 +    fprintf(fp, "FFT library:        fftpack (built-in)\n");
 +#elif defined(GMX_FFT_FFTW3) && defined(GMX_NATIVE_WINDOWS)
 +    fprintf(fp, "FFT library:        %s\n", "fftw3");
 +#elif defined(GMX_FFT_FFTW3) && defined(GMX_DOUBLE)
 +    fprintf(fp, "FFT library:        %s\n", fftw_version);
 +#elif defined(GMX_FFT_FFTW3)
 +    fprintf(fp, "FFT library:        %s\n", fftwf_version);
 +#elif defined(GMX_FFT_MKL)
 +    fprintf(fp, "FFT library:        MKL\n");
 +#else
 +    fprintf(fp, "FFT library:        unknown\n");
 +#endif
 +#ifdef GMX_LARGEFILES
 +    fprintf(fp, "Large file support: enabled\n");
 +#else
 +    fprintf(fp, "Large file support: disabled\n");
 +#endif
 +#ifdef HAVE_RDTSCP
 +    fprintf(fp, "RDTSCP usage:       enabled\n");
 +#else
 +    fprintf(fp, "RDTSCP usage:       disabled\n");
 +#endif
 +
 +    fprintf(fp, "Built on:           %s\n", BUILD_TIME);
 +    fprintf(fp, "Built by:           %s\n", BUILD_USER);
 +    fprintf(fp, "Build OS/arch:      %s\n", BUILD_HOST);
 +    fprintf(fp, "Build CPU vendor:   %s\n", BUILD_CPU_VENDOR);
 +    fprintf(fp, "Build CPU brand:    %s\n", BUILD_CPU_BRAND);
 +    fprintf(fp, "Build CPU family:   %d   Model: %d   Stepping: %d\n",
 +            BUILD_CPU_FAMILY, BUILD_CPU_MODEL, BUILD_CPU_STEPPING);
 +    /* TODO: The below strings can be quite long, so it would be nice to wrap
 +     * them. Can wait for later, as the master branch has ready code to do all
 +     * that. */
 +    fprintf(fp, "Build CPU features: %s\n", BUILD_CPU_FEATURES);
 +    fprintf(fp, "C compiler:         %s\n", BUILD_C_COMPILER);
 +    fprintf(fp, "C compiler flags:   %s\n", BUILD_CFLAGS);
 +    if (BUILD_CXX_COMPILER[0] != '\0')
 +    {
 +        fprintf(fp, "C++ compiler:       %s\n", BUILD_CXX_COMPILER);
 +        fprintf(fp, "C++ compiler flags: %s\n", BUILD_CXXFLAGS);
 +    }
 +#ifdef HAVE_LIBMKL
 +    /* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */
 +    fprintf(fp, "Linked with Intel MKL version %s.%s.%s.\n",
 +            __INTEL_MKL__, __INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__);
 +#endif
 +#ifdef GMX_GPU
 +    gmx_print_version_info_gpu(fp);
 +#endif
 +
 +}
index 6941558aa1a81fcfb5d3820f0015a9de431e6827,0000000000000000000000000000000000000000..9b17054dd9268bde48ee3d6ed3999f10e591998a
mode 100644,000000..100644
--- /dev/null
@@@ -1,783 -1,0 +1,790 @@@
-     real          rcoulomb, rvdw, factor_coul, factor_vdw, sh_invrc6;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +
 +#include "vec.h"
 +#include "typedefs.h"
 +#include "nonbonded.h"
 +#include "nb_kernel.h"
 +#include "nrnb.h"
 +
 +void
 +gmx_nb_free_energy_kernel(t_nblist *                nlist,
 +                          rvec *                    xx,
 +                          rvec *                    ff,
 +                          t_forcerec *              fr,
 +                          t_mdatoms *               mdatoms,
 +                          nb_kernel_data_t *        kernel_data,
 +                          t_nrnb *                  nrnb)
 +{
 +
 +#define  STATE_A  0
 +#define  STATE_B  1
 +#define  NSTATES  2
 +    int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
 +    real          shX, shY, shZ;
 +    real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
 +    real          Vcoul[NSTATES], Vvdw[NSTATES];
 +    real          rinv6, r, rt, rtC, rtV;
 +    real          iqA, iqB;
 +    real          qq[NSTATES], vctot, krsq;
 +    int           ntiA, ntiB, tj[NSTATES];
 +    real          Vvdw6, Vvdw12, vvtot;
 +    real          ix, iy, iz, fix, fiy, fiz;
 +    real          dx, dy, dz, rsq, rinv;
 +    real          c6[NSTATES], c12[NSTATES];
 +    real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
 +    double        dvdl_coul, dvdl_vdw;
 +    real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 +    real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
 +    real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
 +    real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
 +    int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
 +    int           n0, n1C, n1V, nnn;
 +    real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
 +    int           icoul, ivdw;
 +    int           nri;
 +    int *         iinr;
 +    int *         jindex;
 +    int *         jjnr;
 +    int *         shift;
 +    int *         gid;
 +    int *         typeA;
 +    int *         typeB;
 +    int           ntype;
 +    real *        shiftvec;
 +    real          dvdl_part;
 +    real *        fshift;
 +    real          tabscale;
 +    real *        VFtab;
 +    real *        x;
 +    real *        f;
 +    real          facel, krf, crf;
 +    real *        chargeA;
 +    real *        chargeB;
 +    real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 +    real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
 +    real *        nbfp;
 +    real *        dvdl;
 +    real *        Vv;
 +    real *        Vc;
 +    gmx_bool      bDoForces;
-                     factor_coul    = (rC <= rcoulomb) ? 1.0 : 0.0;
-                     factor_vdw     = (rV <= rvdw)     ? 1.0 : 0.0;
++    real          rcoulomb, rvdw, sh_invrc6;
 +    gmx_bool      bExactElecCutoff, bExactVdwCutoff;
 +    real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
 +
 +    x                   = xx[0];
 +    f                   = ff[0];
 +
 +    fshift              = fr->fshift[0];
 +    Vc                  = kernel_data->energygrp_elec;
 +    Vv                  = kernel_data->energygrp_vdw;
 +    tabscale            = kernel_data->table_elec_vdw->scale;
 +    VFtab               = kernel_data->table_elec_vdw->data;
 +
 +    nri                 = nlist->nri;
 +    iinr                = nlist->iinr;
 +    jindex              = nlist->jindex;
 +    jjnr                = nlist->jjnr;
 +    icoul               = nlist->ielec;
 +    ivdw                = nlist->ivdw;
 +    shift               = nlist->shift;
 +    gid                 = nlist->gid;
 +
 +    shiftvec            = fr->shift_vec[0];
 +    chargeA             = mdatoms->chargeA;
 +    chargeB             = mdatoms->chargeB;
 +    facel               = fr->epsfac;
 +    krf                 = fr->k_rf;
 +    crf                 = fr->c_rf;
 +    ewc                 = fr->ewaldcoeff;
 +    Vc                  = kernel_data->energygrp_elec;
 +    typeA               = mdatoms->typeA;
 +    typeB               = mdatoms->typeB;
 +    ntype               = fr->ntype;
 +    nbfp                = fr->nbfp;
 +    Vv                  = kernel_data->energygrp_vdw;
 +    tabscale            = kernel_data->table_elec_vdw->scale;
 +    VFtab               = kernel_data->table_elec_vdw->data;
 +    lambda_coul         = kernel_data->lambda[efptCOUL];
 +    lambda_vdw          = kernel_data->lambda[efptVDW];
 +    dvdl                = kernel_data->dvdl;
 +    alpha_coul          = fr->sc_alphacoul;
 +    alpha_vdw           = fr->sc_alphavdw;
 +    lam_power           = fr->sc_power;
 +    sc_r_power          = fr->sc_r_power;
 +    sigma6_def          = fr->sc_sigma6_def;
 +    sigma6_min          = fr->sc_sigma6_min;
 +    bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 +
 +    rcoulomb            = fr->rcoulomb;
 +    rvdw                = fr->rvdw;
 +    sh_invrc6           = fr->ic->sh_invrc6;
 +
 +    if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
 +    {
 +        rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
 +        rcutoff2        = rcutoff*rcutoff;
 +        rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
 +        d               = rcutoff-rswitch;
 +        swV3            = -10.0/(d*d*d);
 +        swV4            =  15.0/(d*d*d*d);
 +        swV5            =  -6.0/(d*d*d*d*d);
 +        swF2            = -30.0/(d*d*d);
 +        swF3            =  60.0/(d*d*d*d);
 +        swF4            = -30.0/(d*d*d*d*d);
 +    }
 +    else
 +    {
 +        /* Stupid compilers dont realize these variables will not be used */
 +        rswitch         = 0.0;
 +        swV3            = 0.0;
 +        swV4            = 0.0;
 +        swV5            = 0.0;
 +        swF2            = 0.0;
 +        swF3            = 0.0;
 +        swF4            = 0.0;
 +    }
 +
 +    bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 +    bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
 +
 +    /* fix compiler warnings */
 +    nj1   = 0;
 +    n1C   = n1V   = 0;
 +    epsC  = epsV  = 0;
 +    eps2C = eps2V = 0;
 +
 +    dvdl_coul  = 0;
 +    dvdl_vdw   = 0;
 +
 +    /* Lambda factor for state A, 1-lambda*/
 +    LFC[STATE_A] = 1.0 - lambda_coul;
 +    LFV[STATE_A] = 1.0 - lambda_vdw;
 +
 +    /* Lambda factor for state B, lambda*/
 +    LFC[STATE_B] = lambda_coul;
 +    LFV[STATE_B] = lambda_vdw;
 +
 +    /*derivative of the lambda factor for state A and B */
 +    DLF[STATE_A] = -1;
 +    DLF[STATE_B] = 1;
 +
 +    for (i = 0; i < NSTATES; i++)
 +    {
 +        lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 +        dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 +        lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 +        dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 +    }
 +    /* precalculate */
 +    sigma2_def = pow(sigma6_def, 1.0/3.0);
 +    sigma2_min = pow(sigma6_min, 1.0/3.0);
 +
 +    /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 +
 +    do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
 +    do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 +
 +    do_tab = do_coultab || do_vdwtab;
 +
 +    /* we always use the combined table here */
 +    tab_elemsize = 12;
 +
 +    for (n = 0; (n < nri); n++)
 +    {
 +        is3              = 3*shift[n];
 +        shX              = shiftvec[is3];
 +        shY              = shiftvec[is3+1];
 +        shZ              = shiftvec[is3+2];
 +        nj0              = jindex[n];
 +        nj1              = jindex[n+1];
 +        ii               = iinr[n];
 +        ii3              = 3*ii;
 +        ix               = shX + x[ii3+0];
 +        iy               = shY + x[ii3+1];
 +        iz               = shZ + x[ii3+2];
 +        iqA              = facel*chargeA[ii];
 +        iqB              = facel*chargeB[ii];
 +        ntiA             = 2*ntype*typeA[ii];
 +        ntiB             = 2*ntype*typeB[ii];
 +        vctot            = 0;
 +        vvtot            = 0;
 +        fix              = 0;
 +        fiy              = 0;
 +        fiz              = 0;
 +
 +        for (k = nj0; (k < nj1); k++)
 +        {
 +            jnr              = jjnr[k];
 +            j3               = 3*jnr;
 +            dx               = ix - x[j3];
 +            dy               = iy - x[j3+1];
 +            dz               = iz - x[j3+2];
 +            rsq              = dx*dx+dy*dy+dz*dz;
 +            rinv             = gmx_invsqrt(rsq);
 +            r                = rsq*rinv;
 +            if (sc_r_power == 6.0)
 +            {
 +                rpm2             = rsq*rsq;  /* r4 */
 +                rp               = rpm2*rsq; /* r6 */
 +            }
 +            else if (sc_r_power == 48.0)
 +            {
 +                rp               = rsq*rsq*rsq; /* r6 */
 +                rp               = rp*rp;       /* r12 */
 +                rp               = rp*rp;       /* r24 */
 +                rp               = rp*rp;       /* r48 */
 +                rpm2             = rp/rsq;      /* r46 */
 +            }
 +            else
 +            {
 +                rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 +                rpm2           = rp/rsq;
 +            }
 +
 +            tj[STATE_A]      = ntiA+2*typeA[jnr];
 +            tj[STATE_B]      = ntiB+2*typeB[jnr];
 +            qq[STATE_A]      = iqA*chargeA[jnr];
 +            qq[STATE_B]      = iqB*chargeB[jnr];
 +
 +            for (i = 0; i < NSTATES; i++)
 +            {
 +
 +                c6[i]              = nbfp[tj[i]];
 +                c12[i]             = nbfp[tj[i]+1];
 +                if ((c6[i] > 0) && (c12[i] > 0))
 +                {
 +                    /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 +                    sigma6[i]       = 0.5*c12[i]/c6[i];
 +                    sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 +                    /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 +                       what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 +                    if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 +                    {
 +                        sigma6[i] = sigma6_min;
 +                        sigma2[i] = sigma2_min;
 +                    }
 +                }
 +                else
 +                {
 +                    sigma6[i]       = sigma6_def;
 +                    sigma2[i]       = sigma2_def;
 +                }
 +                if (sc_r_power == 6.0)
 +                {
 +                    sigma_pow[i]    = sigma6[i];
 +                    sigma_powm2[i]  = sigma6[i]/sigma2[i];
 +                }
 +                else if (sc_r_power == 48.0)
 +                {
 +                    sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 +                    sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 +                    sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 +                    sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 +                }
 +                else
 +                {    /* not really supported as input, but in here for testing the general case*/
 +                    sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 +                    sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 +                }
 +            }
 +
 +            /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 +            if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 +            {
 +                alpha_vdw_eff    = 0;
 +                alpha_coul_eff   = 0;
 +            }
 +            else
 +            {
 +                alpha_vdw_eff    = alpha_vdw;
 +                alpha_coul_eff   = alpha_coul;
 +            }
 +
 +            for (i = 0; i < NSTATES; i++)
 +            {
 +                FscalC[i]    = 0;
 +                FscalV[i]    = 0;
 +                Vcoul[i]     = 0;
 +                Vvdw[i]      = 0;
 +
 +                /* Only spend time on A or B state if it is non-zero */
 +                if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 +                {
 +
 +                    /* this section has to be inside the loop becaue of the dependence on sigma_pow */
 +                    rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 +                    rinvC          = pow(rpinvC, 1.0/sc_r_power);
 +                    rC             = 1.0/rinvC;
 +
 +                    rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 +                    rinvV          = pow(rpinvV, 1.0/sc_r_power);
 +                    rV             = 1.0/rinvV;
 +
-                     if (qq[i] != 0)
 +                    if (do_tab)
 +                    {
 +                        rtC        = rC*tabscale;
 +                        n0         = rtC;
 +                        epsC       = rtC-n0;
 +                        eps2C      = epsC*epsC;
 +                        n1C        = tab_elemsize*n0;
 +
 +                        rtV        = rV*tabscale;
 +                        n0         = rtV;
 +                        epsV       = rtV-n0;
 +                        eps2V      = epsV*epsV;
 +                        n1V        = tab_elemsize*n0;
 +                    }
 +
-                         if (bExactElecCutoff)
-                         {
-                             FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : 0.0;
-                             Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : 0.0;
-                         }
++                    /* With Ewald and soft-core we should put the cut-off on r,
++                     * not on the soft-cored rC, as the real-space and
++                     * reciprocal space contributions should (almost) cancel.
++                     */
++                    if (qq[i] != 0 &&
++                        !(bExactElecCutoff &&
++                          ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
++                           (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
 +                    {
 +                        switch (icoul)
 +                        {
 +                            case GMX_NBKERNEL_ELEC_COULOMB:
 +                            case GMX_NBKERNEL_ELEC_EWALD:
 +                                /* simple cutoff (yes, ewald is done all on direct space for free energy) */
 +                                Vcoul[i]   = qq[i]*rinvC;
 +                                FscalC[i]  = Vcoul[i]*rpinvC;
 +                                break;
 +
 +                            case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 +                                /* reaction-field */
 +                                Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
 +                                FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
 +                                break;
 +
 +                            case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 +                                /* non-Ewald tabulated coulomb */
 +                                nnn        = n1C;
 +                                Y          = VFtab[nnn];
 +                                F          = VFtab[nnn+1];
 +                                Geps       = epsC*VFtab[nnn+2];
 +                                Heps2      = eps2C*VFtab[nnn+3];
 +                                Fp         = F+Geps+Heps2;
 +                                VV         = Y+epsC*Fp;
 +                                FF         = Fp+Geps+2.0*Heps2;
 +                                Vcoul[i]   = qq[i]*VV;
 +                                FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
 +                                break;
 +
 +                            default:
 +                                FscalC[i]  = 0.0;
 +                                Vcoul[i]   = 0.0;
 +                                break;
 +                        }
 +
 +                        if (fr->coulomb_modifier == eintmodPOTSWITCH)
 +                        {
 +                            d                = rC-rswitch;
 +                            d                = (d > 0.0) ? d : 0.0;
 +                            d2               = d*d;
 +                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 +                            dsw              = d2*(swF2+d*(swF3+d*swF4));
 +
 +                            Vcoul[i]        *= sw;
 +                            FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
 +                        }
-                     if ((c6[i] != 0) || (c12[i] != 0))
 +                    }
 +
-             if (icoul == GMX_NBKERNEL_ELEC_EWALD)
++                    if ((c6[i] != 0 || c12[i] != 0) &&
++                        !(bExactVdwCutoff && rV >= rvdw))
 +                    {
 +                        switch (ivdw)
 +                        {
 +                            case GMX_NBKERNEL_VDW_LENNARDJONES:
 +                                /* cutoff LJ */
 +                                if (sc_r_power == 6.0)
 +                                {
 +                                    rinv6            = rpinvV;
 +                                }
 +                                else
 +                                {
 +                                    rinv6            = pow(rinvV, 6.0);
 +                                }
 +                                Vvdw6            = c6[i]*rinv6;
 +                                Vvdw12           = c12[i]*rinv6*rinv6;
 +                                if (fr->vdw_modifier == eintmodPOTSHIFT)
 +                                {
 +                                    Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 +                                                         -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 +                                }
 +                                else
 +                                {
 +                                    Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
 +                                }
 +                                FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
 +                                break;
 +
 +                            case GMX_NBKERNEL_VDW_BUCKINGHAM:
 +                                gmx_fatal(FARGS, "Buckingham free energy not supported.");
 +                                break;
 +
 +                            case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 +                                /* Table LJ */
 +                                nnn = n1V+4;
 +                                /* dispersion */
 +                                Y          = VFtab[nnn];
 +                                F          = VFtab[nnn+1];
 +                                Geps       = epsV*VFtab[nnn+2];
 +                                Heps2      = eps2V*VFtab[nnn+3];
 +                                Fp         = F+Geps+Heps2;
 +                                VV         = Y+epsV*Fp;
 +                                FF         = Fp+Geps+2.0*Heps2;
 +                                Vvdw[i]   += c6[i]*VV;
 +                                FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
 +
 +                                /* repulsion */
 +                                Y          = VFtab[nnn+4];
 +                                F          = VFtab[nnn+5];
 +                                Geps       = epsV*VFtab[nnn+6];
 +                                Heps2      = eps2V*VFtab[nnn+7];
 +                                Fp         = F+Geps+Heps2;
 +                                VV         = Y+epsV*Fp;
 +                                FF         = Fp+Geps+2.0*Heps2;
 +                                Vvdw[i]   += c12[i]*VV;
 +                                FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
 +                                break;
 +
 +                            default:
 +                                Vvdw[i]    = 0.0;
 +                                FscalV[i]  = 0.0;
 +                                break;
 +                        }
 +
 +                        if (fr->vdw_modifier == eintmodPOTSWITCH)
 +                        {
 +                            d                = rV-rswitch;
 +                            d                = (d > 0.0) ? d : 0.0;
 +                            d2               = d*d;
 +                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 +                            dsw              = d2*(swF2+d*(swF3+d*swF4));
 +
 +                            Vvdw[i]         *= sw;
 +                            FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
 +
 +                            FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
 +                            Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
 +                        }
 +                    }
 +                }
 +            }
 +
 +            Fscal = 0;
 +
-                 if (r != 0)
++            if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
++                !(bExactElecCutoff && r >= rcoulomb))
 +            {
 +                /* because we compute the softcore normally,
 +                   we have to remove the ewald short range portion. Done outside of
 +                   the states loop because this part doesn't depend on the scaled R */
 +
-                     FF    = 0;
++#ifdef GMX_DOUBLE
++                /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
++#define         R_ERF_R_INACC 0.006
++#else
++                /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
++#define         R_ERF_R_INACC 0.1
++#endif
++                if (ewc*r > R_ERF_R_INACC)
 +                {
 +                    VV    = gmx_erf(ewc*r)*rinv;
 +                    FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
 +                }
 +                else
 +                {
 +                    VV    = ewc*M_2_SQRTPI;
++                    FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
 +                }
 +
 +                for (i = 0; i < NSTATES; i++)
 +                {
 +                    vctot      -= LFC[i]*qq[i]*VV;
 +                    Fscal      -= LFC[i]*qq[i]*FF;
 +                    dvdl_coul  -= (DLF[i]*qq[i])*VV;
 +                }
 +            }
 +
 +            /* Assemble A and B states */
 +            for (i = 0; i < NSTATES; i++)
 +            {
 +                vctot         += LFC[i]*Vcoul[i];
 +                vvtot         += LFV[i]*Vvdw[i];
 +
 +                Fscal         += LFC[i]*FscalC[i]*rpm2;
 +                Fscal         += LFV[i]*FscalV[i]*rpm2;
 +
 +                dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 +                dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 +            }
 +
 +            if (bDoForces)
 +            {
 +                tx         = Fscal*dx;
 +                ty         = Fscal*dy;
 +                tz         = Fscal*dz;
 +                fix        = fix + tx;
 +                fiy        = fiy + ty;
 +                fiz        = fiz + tz;
 +                f[j3]      = f[j3]   - tx;
 +                f[j3+1]    = f[j3+1] - ty;
 +                f[j3+2]    = f[j3+2] - tz;
 +            }
 +        }
 +
 +        if (bDoForces)
 +        {
 +            f[ii3]         = f[ii3]        + fix;
 +            f[ii3+1]       = f[ii3+1]      + fiy;
 +            f[ii3+2]       = f[ii3+2]      + fiz;
 +            fshift[is3]    = fshift[is3]   + fix;
 +            fshift[is3+1]  = fshift[is3+1] + fiy;
 +            fshift[is3+2]  = fshift[is3+2] + fiz;
 +        }
 +        ggid               = gid[n];
 +        Vc[ggid]           = Vc[ggid] + vctot;
 +        Vv[ggid]           = Vv[ggid] + vvtot;
 +    }
 +
 +    dvdl[efptCOUL]     += dvdl_coul;
 +    dvdl[efptVDW]      += dvdl_vdw;
 +
 +    /* Estimate flops, average for free energy stuff:
 +     * 12  flops per outer iteration
 +     * 150 flops per inner iteration
 +     */
 +    inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 +}
 +
 +real
 +nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 +                               real tabscale, real *vftab,
 +                               real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 +                               real LFC[2], real LFV[2], real DLF[2],
 +                               real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 +                               real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 +                               real *velectot, real *vvdwtot, real *dvdl)
 +{
 +    real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 +    real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 +    real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 +    real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 +    real       fscal_vdw[2], fscal_elec[2];
 +    real       velec[2], vvdw[2];
 +    int        i, ntab;
 +
 +    qq[0]    = qqA;
 +    qq[1]    = qqB;
 +    c6[0]    = c6A;
 +    c6[1]    = c6B;
 +    c12[0]   = c12A;
 +    c12[1]   = c12B;
 +
 +    if (sc_r_power == 6.0)
 +    {
 +        rpm2             = r2*r2;   /* r4 */
 +        rp               = rpm2*r2; /* r6 */
 +    }
 +    else if (sc_r_power == 48.0)
 +    {
 +        rp               = r2*r2*r2; /* r6 */
 +        rp               = rp*rp;    /* r12 */
 +        rp               = rp*rp;    /* r24 */
 +        rp               = rp*rp;    /* r48 */
 +        rpm2             = rp/r2;    /* r46 */
 +    }
 +    else
 +    {
 +        rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 +        rpm2           = rp/r2;
 +    }
 +
 +    /* Loop over state A(0) and B(1) */
 +    for (i = 0; i < 2; i++)
 +    {
 +        if ((c6[i] > 0) && (c12[i] > 0))
 +        {
 +            /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 +             * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 +             */
 +            sigma6[i]       = 0.5*c12[i]/c6[i];
 +            sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 +            /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 +               what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 +            if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 +            {
 +                sigma6[i] = sigma6_min;
 +                sigma2[i] = sigma2_min;
 +            }
 +        }
 +        else
 +        {
 +            sigma6[i]       = sigma6_def;
 +            sigma2[i]       = sigma2_def;
 +        }
 +        if (sc_r_power == 6.0)
 +        {
 +            sigma_pow[i]    = sigma6[i];
 +            sigma_powm2[i]  = sigma6[i]/sigma2[i];
 +        }
 +        else if (sc_r_power == 48.0)
 +        {
 +            sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 +            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 +            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 +            sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 +        }
 +        else
 +        {    /* not really supported as input, but in here for testing the general case*/
 +            sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 +            sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 +        }
 +    }
 +
 +    /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 +    if ((c12[0] > 0) && (c12[1] > 0))
 +    {
 +        alpha_vdw_eff    = 0;
 +        alpha_coul_eff   = 0;
 +    }
 +    else
 +    {
 +        alpha_vdw_eff    = alpha_vdw;
 +        alpha_coul_eff   = alpha_coul;
 +    }
 +
 +    /* Loop over A and B states again */
 +    for (i = 0; i < 2; i++)
 +    {
 +        fscal_elec[i] = 0;
 +        fscal_vdw[i]  = 0;
 +        velec[i]      = 0;
 +        vvdw[i]       = 0;
 +
 +        /* Only spend time on A or B state if it is non-zero */
 +        if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 +        {
 +            /* Coulomb */
 +            rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 +            r_coul           = pow(rpinv, -1.0/sc_r_power);
 +
 +            /* Electrostatics table lookup data */
 +            rtab             = r_coul*tabscale;
 +            ntab             = rtab;
 +            eps              = rtab-ntab;
 +            eps2             = eps*eps;
 +            ntab             = 12*ntab;
 +            /* Electrostatics */
 +            Y                = vftab[ntab];
 +            F                = vftab[ntab+1];
 +            Geps             = eps*vftab[ntab+2];
 +            Heps2            = eps2*vftab[ntab+3];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+eps*Fp;
 +            FF               = Fp+Geps+2.0*Heps2;
 +            velec[i]         = qq[i]*VV;
 +            fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
 +
 +            /* Vdw */
 +            rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 +            r_vdw            = pow(rpinv, -1.0/sc_r_power);
 +            /* Vdw table lookup data */
 +            rtab             = r_vdw*tabscale;
 +            ntab             = rtab;
 +            eps              = rtab-ntab;
 +            eps2             = eps*eps;
 +            ntab             = 12*ntab;
 +            /* Dispersion */
 +            Y                = vftab[ntab+4];
 +            F                = vftab[ntab+5];
 +            Geps             = eps*vftab[ntab+6];
 +            Heps2            = eps2*vftab[ntab+7];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+eps*Fp;
 +            FF               = Fp+Geps+2.0*Heps2;
 +            vvdw[i]          = c6[i]*VV;
 +            fscal_vdw[i]     = -c6[i]*FF;
 +
 +            /* Repulsion */
 +            Y                = vftab[ntab+8];
 +            F                = vftab[ntab+9];
 +            Geps             = eps*vftab[ntab+10];
 +            Heps2            = eps2*vftab[ntab+11];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+eps*Fp;
 +            FF               = Fp+Geps+2.0*Heps2;
 +            vvdw[i]         += c12[i]*VV;
 +            fscal_vdw[i]    -= c12[i]*FF;
 +            fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
 +        }
 +    }
 +    /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 +    /* Assemble A and B states */
 +    velecsum  = 0;
 +    vvdwsum   = 0;
 +    dvdl_coul = 0;
 +    dvdl_vdw  = 0;
 +    fscal     = 0;
 +    for (i = 0; i < 2; i++)
 +    {
 +        velecsum      += LFC[i]*velec[i];
 +        vvdwsum       += LFV[i]*vvdw[i];
 +
 +        fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
 +
 +        dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
 +        dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
 +    }
 +
 +    dvdl[efptCOUL]     += dvdl_coul;
 +    dvdl[efptVDW]      += dvdl_vdw;
 +
 +    *velectot           = velecsum;
 +    *vvdwtot            = vvdwsum;
 +
 +    return fscal;
 +}
index a58de0c174787be25ee248910ee4661a21449988,0000000000000000000000000000000000000000..ceb52b591ef4831c910b6d8faa0773d9c9cf2af0
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_avx_128_fma_double[] =
 +/*
 + * Note: this file was generated by the Gromacs avx_128_fma_double kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_avx_128_fma_double_h
 +#define nb_kernel_avx_128_fma_double_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double;
 +
 +
 +nb_kernel_info_t
-     kernellist_avx_128_fma_double_size = sizeof(kernellist_avx_128_fma_double)/sizeof(kernellist_avx_128_fma_double[0]);
++kernellist_avx_128_fma_double[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_double", "avx_128_fma_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_avx_128_fma_double_size = sizeof(kernellist_avx_128_fma_double)/sizeof(kernellist_avx_128_fma_double[0]);
 +
 +#endif
index dfc40d926119bbe94f8536f6f2e1b62b298ee48d,0000000000000000000000000000000000000000..20add7f0a6ac5240f3314a33351baba79600e682
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_avx_128_fma_single[] =
 +/*
 + * Note: this file was generated by the Gromacs avx_128_fma_single kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_avx_128_fma_single_h
 +#define nb_kernel_avx_128_fma_single_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single;
 +
 +
 +nb_kernel_info_t
-     kernellist_avx_128_fma_single_size = sizeof(kernellist_avx_128_fma_single)/sizeof(kernellist_avx_128_fma_single[0]);
++kernellist_avx_128_fma_single[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_128_fma_single", "avx_128_fma_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_avx_128_fma_single_size = sizeof(kernellist_avx_128_fma_single)/sizeof(kernellist_avx_128_fma_single[0]);
 +
 +#endif
index 9fef448af5b975454c29c169fc4624a23d65f2fe,0000000000000000000000000000000000000000..a887b3b9e4cee08d3a965722309e2c52377a7fc9
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_avx_256_double[] =
 +/*
 + * Note: this file was generated by the Gromacs avx_256_double kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_avx_256_double_h
 +#define nb_kernel_avx_256_double_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double;
 +
 +
 +nb_kernel_info_t
-     kernellist_avx_256_double_size = sizeof(kernellist_avx_256_double)/sizeof(kernellist_avx_256_double[0]);
++kernellist_avx_256_double[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double", "avx_256_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_avx_256_double_size = sizeof(kernellist_avx_256_double)/sizeof(kernellist_avx_256_double[0]);
 +
 +#endif
index 7d71b2ce66de5a631d42f0f51bd1615972911d08,0000000000000000000000000000000000000000..f316f44605247bdd8188431ab415f50d2faaaecf
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_avx_256_single[] =
 +/*
 + * Note: this file was generated by the Gromacs avx_256_single kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_avx_256_single_h
 +#define nb_kernel_avx_256_single_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single;
 +
 +
 +nb_kernel_info_t
-     kernellist_avx_256_single_size = sizeof(kernellist_avx_256_single)/sizeof(kernellist_avx_256_single[0]);
++kernellist_avx_256_single[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_single", "avx_256_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_avx_256_single_size = sizeof(kernellist_avx_256_single)/sizeof(kernellist_avx_256_single[0]);
 +
 +#endif
index bddc5873d4eba690cd3369d5cf54c22f8c7312f0,0000000000000000000000000000000000000000..23ba2990df4d02737f0ab28b357671bdefa8fe8d
mode 100644,000000..100644
--- /dev/null
@@@ -1,490 -1,0 +1,487 @@@
-         printf("inr=%d\n",inr);
 +/*
 + * Note: this file was generated by the Gromacs c kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +
 +#include "../nb_kernel.h"
 +#include "types/simple.h"
 +#include "vec.h"
 +#include "nrnb.h"
 +
 +/*
 + * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
 + * Electrostatics interaction: GeneralizedBorn
 + * VdW interaction:            LennardJones
 + * Geometry:                   Particle-Particle
 + * Calculate force/pot:        PotentialAndForce
 + */
 +void
 +nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
 +                    (t_nblist * gmx_restrict                nlist,
 +                     rvec * gmx_restrict                    xx,
 +                     rvec * gmx_restrict                    ff,
 +                     t_forcerec * gmx_restrict              fr,
 +                     t_mdatoms * gmx_restrict               mdatoms,
 +                     nb_kernel_data_t * gmx_restrict        kernel_data,
 +                     t_nrnb * gmx_restrict                  nrnb)
 +{
 +    int              i_shift_offset,i_coord_offset,j_coord_offset;
 +    int              j_index_start,j_index_end;
 +    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
 +    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
 +    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
 +    real             *shiftvec,*fshift,*x,*f;
 +    int              vdwioffset0;
 +    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
 +    int              vdwjidx0;
 +    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
 +    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
 +    real             velec,felec,velecsum,facel,crf,krf,krf2;
 +    real             *charge;
 +    int              gbitab;
 +    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
 +    real             *invsqrta,*dvda,*gbtab;
 +    int              nvdwtype;
 +    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
 +    int              *vdwtype;
 +    real             *vdwparam;
 +    int              vfitab;
 +    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
 +    real             *vftab;
 +
 +    x                = xx[0];
 +    f                = ff[0];
 +
 +    nri              = nlist->nri;
 +    iinr             = nlist->iinr;
 +    jindex           = nlist->jindex;
 +    jjnr             = nlist->jjnr;
 +    shiftidx         = nlist->shift;
 +    gid              = nlist->gid;
 +    shiftvec         = fr->shift_vec[0];
 +    fshift           = fr->fshift[0];
 +    facel            = fr->epsfac;
 +    charge           = mdatoms->chargeA;
 +    nvdwtype         = fr->ntype;
 +    vdwparam         = fr->nbfp;
 +    vdwtype          = mdatoms->typeA;
 +
 +    invsqrta         = fr->invsqrta;
 +    dvda             = fr->dvda;
 +    gbtabscale       = fr->gbtab.scale;
 +    gbtab            = fr->gbtab.data;
 +    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
 +
 +    outeriter        = 0;
 +    inneriter        = 0;
 +
 +    /* Start outer loop over neighborlists */
 +    for(iidx=0; iidx<nri; iidx++)
 +    {
 +        /* Load shift vector for this list */
 +        i_shift_offset   = DIM*shiftidx[iidx];
 +        shX              = shiftvec[i_shift_offset+XX];
 +        shY              = shiftvec[i_shift_offset+YY];
 +        shZ              = shiftvec[i_shift_offset+ZZ];
 +
 +        /* Load limits for loop over neighbors */
 +        j_index_start    = jindex[iidx];
 +        j_index_end      = jindex[iidx+1];
 +
 +        /* Get outer coordinate index */
 +        inr              = iinr[iidx];
 +        i_coord_offset   = DIM*inr;
 +
 +        /* Load i particle coords and add shift vector */
 +        ix0              = shX + x[i_coord_offset+DIM*0+XX];
 +        iy0              = shY + x[i_coord_offset+DIM*0+YY];
 +        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
 +
 +        fix0             = 0.0;
 +        fiy0             = 0.0;
 +        fiz0             = 0.0;
 +
 +        /* Load parameters for i particles */
 +        iq0              = facel*charge[inr+0];
 +        isai0            = invsqrta[inr+0];
 +        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
 +
 +        /* Reset potential sums */
 +        velecsum         = 0.0;
 +        vgbsum           = 0.0;
 +        vvdwsum          = 0.0;
 +        dvdasum          = 0.0;
-             printf("  jnr=%d  fgb=%g\n",jnr,fgb);
++
 +        /* Start inner kernel loop */
 +        for(jidx=j_index_start; jidx<j_index_end; jidx++)
 +        {
 +            /* Get j neighbor index, and coordinate index */
 +            jnr              = jjnr[jidx];
 +            j_coord_offset   = DIM*jnr;
 +
 +            /* load j atom coordinates */
 +            jx0              = x[j_coord_offset+DIM*0+XX];
 +            jy0              = x[j_coord_offset+DIM*0+YY];
 +            jz0              = x[j_coord_offset+DIM*0+ZZ];
 +
 +            /* Calculate displacement vector */
 +            dx00             = ix0 - jx0;
 +            dy00             = iy0 - jy0;
 +            dz00             = iz0 - jz0;
 +
 +            /* Calculate squared distance and things based on it */
 +            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
 +
 +            rinv00           = gmx_invsqrt(rsq00);
 +
 +            rinvsq00         = rinv00*rinv00;
 +
 +            /* Load parameters for j particles */
 +            jq0              = charge[jnr+0];
 +            isaj0           = invsqrta[jnr+0];
 +            vdwjidx0         = 2*vdwtype[jnr+0];
 +
 +            /**************************
 +             * CALCULATE INTERACTIONS *
 +             **************************/
 +
 +            r00              = rsq00*rinv00;
 +
 +            qq00             = iq0*jq0;
 +            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
 +            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
 +
 +            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
 +            isaprod          = isai0*isaj0;
 +            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
 +            gbscale          = isaprod*gbtabscale;
 +            dvdaj            = dvda[jnr+0];
 +
 +            /* Calculate generalized born table index - this is a separate table from the normal one,
 +             * but we use the same procedure by multiplying r with scale and truncating to integer.
 +             */
 +            rt               = r00*gbscale;
 +            gbitab           = rt;
 +            gbeps            = rt-gbitab;
 +            gbitab           = 4*gbitab;
 +
 +            Y                = gbtab[gbitab];
 +            F                = gbtab[gbitab+1];
 +            Geps             = gbeps*gbtab[gbitab+2];
 +            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+gbeps*Fp;
 +            vgb              = gbqqfactor*VV;
 +
 +            FF               = Fp+Geps+2.0*Heps2;
 +            fgb              = gbqqfactor*FF*gbscale;
-             printf("  dvdatmp=%g\n",dvdatmp);
 +            dvdatmp          = -0.5*(vgb+fgb*r00);
 +            dvdasum          = dvdasum + dvdatmp;
-             printf("  dvda, jcontrib=%g\n",dvdatmp*isaj0*isaj0);
 +            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
 +            velec            = qq00*rinv00;
 +            felec            = (velec*rinv00-fgb)*rinv00;
 +
 +            /* LENNARD-JONES DISPERSION/REPULSION */
 +
 +            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
 +            vvdw6            = c6_00*rinvsix;
 +            vvdw12           = c12_00*rinvsix*rinvsix;
 +            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
 +            fvdw             = (vvdw12-vvdw6)*rinvsq00;
 +
 +            /* Update potential sums from outer loop */
 +            velecsum        += velec;
 +            vgbsum          += vgb;
 +            vvdwsum         += vvdw;
 +
 +            fscal            = felec+fvdw;
 +
 +            /* Calculate temporary vectorial force */
 +            tx               = fscal*dx00;
 +            ty               = fscal*dy00;
 +            tz               = fscal*dz00;
 +
 +            /* Update vectorial force */
 +            fix0            += tx;
 +            fiy0            += ty;
 +            fiz0            += tz;
 +            f[j_coord_offset+DIM*0+XX] -= tx;
 +            f[j_coord_offset+DIM*0+YY] -= ty;
 +            f[j_coord_offset+DIM*0+ZZ] -= tz;
 +
 +            /* Inner loop uses 71 flops */
 +        }
 +        /* End of innermost loop */
 +
 +        tx = ty = tz = 0;
 +        f[i_coord_offset+DIM*0+XX] += fix0;
 +        f[i_coord_offset+DIM*0+YY] += fiy0;
 +        f[i_coord_offset+DIM*0+ZZ] += fiz0;
 +        tx                         += fix0;
 +        ty                         += fiy0;
 +        tz                         += fiz0;
 +        fshift[i_shift_offset+XX]  += tx;
 +        fshift[i_shift_offset+YY]  += ty;
 +        fshift[i_shift_offset+ZZ]  += tz;
 +
 +        ggid                        = gid[iidx];
 +        /* Update potential energies */
 +        kernel_data->energygrp_elec[ggid] += velecsum;
 +        kernel_data->energygrp_polarization[ggid] += vgbsum;
 +        kernel_data->energygrp_vdw[ggid] += vvdwsum;
 +        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
 +
 +        /* Increment number of inner iterations */
 +        inneriter                  += j_index_end - j_index_start;
 +
 +        /* Outer loop uses 16 flops */
 +    }
 +
 +    /* Increment number of outer iterations */
 +    outeriter        += nri;
 +
 +    /* Update outer/inner flops */
 +
 +    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*71);
 +}
 +/*
 + * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
 + * Electrostatics interaction: GeneralizedBorn
 + * VdW interaction:            LennardJones
 + * Geometry:                   Particle-Particle
 + * Calculate force/pot:        Force
 + */
 +void
 +nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
 +                    (t_nblist * gmx_restrict                nlist,
 +                     rvec * gmx_restrict                    xx,
 +                     rvec * gmx_restrict                    ff,
 +                     t_forcerec * gmx_restrict              fr,
 +                     t_mdatoms * gmx_restrict               mdatoms,
 +                     nb_kernel_data_t * gmx_restrict        kernel_data,
 +                     t_nrnb * gmx_restrict                  nrnb)
 +{
 +    int              i_shift_offset,i_coord_offset,j_coord_offset;
 +    int              j_index_start,j_index_end;
 +    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
 +    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
 +    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
 +    real             *shiftvec,*fshift,*x,*f;
 +    int              vdwioffset0;
 +    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
 +    int              vdwjidx0;
 +    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
 +    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
 +    real             velec,felec,velecsum,facel,crf,krf,krf2;
 +    real             *charge;
 +    int              gbitab;
 +    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
 +    real             *invsqrta,*dvda,*gbtab;
 +    int              nvdwtype;
 +    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
 +    int              *vdwtype;
 +    real             *vdwparam;
 +    int              vfitab;
 +    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
 +    real             *vftab;
 +
 +    x                = xx[0];
 +    f                = ff[0];
 +
 +    nri              = nlist->nri;
 +    iinr             = nlist->iinr;
 +    jindex           = nlist->jindex;
 +    jjnr             = nlist->jjnr;
 +    shiftidx         = nlist->shift;
 +    gid              = nlist->gid;
 +    shiftvec         = fr->shift_vec[0];
 +    fshift           = fr->fshift[0];
 +    facel            = fr->epsfac;
 +    charge           = mdatoms->chargeA;
 +    nvdwtype         = fr->ntype;
 +    vdwparam         = fr->nbfp;
 +    vdwtype          = mdatoms->typeA;
 +
 +    invsqrta         = fr->invsqrta;
 +    dvda             = fr->dvda;
 +    gbtabscale       = fr->gbtab.scale;
 +    gbtab            = fr->gbtab.data;
 +    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
 +
 +    outeriter        = 0;
 +    inneriter        = 0;
 +
 +    /* Start outer loop over neighborlists */
 +    for(iidx=0; iidx<nri; iidx++)
 +    {
 +        /* Load shift vector for this list */
 +        i_shift_offset   = DIM*shiftidx[iidx];
 +        shX              = shiftvec[i_shift_offset+XX];
 +        shY              = shiftvec[i_shift_offset+YY];
 +        shZ              = shiftvec[i_shift_offset+ZZ];
 +
 +        /* Load limits for loop over neighbors */
 +        j_index_start    = jindex[iidx];
 +        j_index_end      = jindex[iidx+1];
 +
 +        /* Get outer coordinate index */
 +        inr              = iinr[iidx];
 +        i_coord_offset   = DIM*inr;
 +
 +        /* Load i particle coords and add shift vector */
 +        ix0              = shX + x[i_coord_offset+DIM*0+XX];
 +        iy0              = shY + x[i_coord_offset+DIM*0+YY];
 +        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
 +
 +        fix0             = 0.0;
 +        fiy0             = 0.0;
 +        fiz0             = 0.0;
 +
 +        /* Load parameters for i particles */
 +        iq0              = facel*charge[inr+0];
 +        isai0            = invsqrta[inr+0];
 +        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
 +
 +        dvdasum          = 0.0;
 +
 +        /* Start inner kernel loop */
 +        for(jidx=j_index_start; jidx<j_index_end; jidx++)
 +        {
 +            /* Get j neighbor index, and coordinate index */
 +            jnr              = jjnr[jidx];
 +            j_coord_offset   = DIM*jnr;
 +
 +            /* load j atom coordinates */
 +            jx0              = x[j_coord_offset+DIM*0+XX];
 +            jy0              = x[j_coord_offset+DIM*0+YY];
 +            jz0              = x[j_coord_offset+DIM*0+ZZ];
 +
 +            /* Calculate displacement vector */
 +            dx00             = ix0 - jx0;
 +            dy00             = iy0 - jy0;
 +            dz00             = iz0 - jz0;
 +
 +            /* Calculate squared distance and things based on it */
 +            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
 +
 +            rinv00           = gmx_invsqrt(rsq00);
 +
 +            rinvsq00         = rinv00*rinv00;
 +
 +            /* Load parameters for j particles */
 +            jq0              = charge[jnr+0];
 +            isaj0           = invsqrta[jnr+0];
 +            vdwjidx0         = 2*vdwtype[jnr+0];
 +
 +            /**************************
 +             * CALCULATE INTERACTIONS *
 +             **************************/
 +
 +            r00              = rsq00*rinv00;
 +
 +            qq00             = iq0*jq0;
 +            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
 +            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
 +
 +            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
 +            isaprod          = isai0*isaj0;
 +            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
 +            gbscale          = isaprod*gbtabscale;
 +            dvdaj            = dvda[jnr+0];
 +
 +            /* Calculate generalized born table index - this is a separate table from the normal one,
 +             * but we use the same procedure by multiplying r with scale and truncating to integer.
 +             */
 +            rt               = r00*gbscale;
 +            gbitab           = rt;
 +            gbeps            = rt-gbitab;
 +            gbitab           = 4*gbitab;
 +
 +            Y                = gbtab[gbitab];
 +            F                = gbtab[gbitab+1];
 +            Geps             = gbeps*gbtab[gbitab+2];
 +            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+gbeps*Fp;
 +            vgb              = gbqqfactor*VV;
 +
 +            FF               = Fp+Geps+2.0*Heps2;
 +            fgb              = gbqqfactor*FF*gbscale;
 +            dvdatmp          = -0.5*(vgb+fgb*r00);
 +            dvdasum          = dvdasum + dvdatmp;
 +            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
 +            velec            = qq00*rinv00;
 +            felec            = (velec*rinv00-fgb)*rinv00;
 +
 +            /* LENNARD-JONES DISPERSION/REPULSION */
 +
 +            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
 +            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
 +
 +            fscal            = felec+fvdw;
 +
 +            /* Calculate temporary vectorial force */
 +            tx               = fscal*dx00;
 +            ty               = fscal*dy00;
 +            tz               = fscal*dz00;
 +
 +            /* Update vectorial force */
 +            fix0            += tx;
 +            fiy0            += ty;
 +            fiz0            += tz;
 +            f[j_coord_offset+DIM*0+XX] -= tx;
 +            f[j_coord_offset+DIM*0+YY] -= ty;
 +            f[j_coord_offset+DIM*0+ZZ] -= tz;
 +
 +            /* Inner loop uses 64 flops */
 +        }
 +        /* End of innermost loop */
 +
 +        tx = ty = tz = 0;
 +        f[i_coord_offset+DIM*0+XX] += fix0;
 +        f[i_coord_offset+DIM*0+YY] += fiy0;
 +        f[i_coord_offset+DIM*0+ZZ] += fiz0;
 +        tx                         += fix0;
 +        ty                         += fiy0;
 +        tz                         += fiz0;
 +        fshift[i_shift_offset+XX]  += tx;
 +        fshift[i_shift_offset+YY]  += ty;
 +        fshift[i_shift_offset+ZZ]  += tz;
 +
 +        dvda[inr]                   = dvda[inr] + dvdasum*isai0*isai0;
 +
 +        /* Increment number of inner iterations */
 +        inneriter                  += j_index_end - j_index_start;
 +
 +        /* Outer loop uses 13 flops */
 +    }
 +
 +    /* Increment number of outer iterations */
 +    outeriter        += nri;
 +
 +    /* Update outer/inner flops */
 +
 +    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*64);
 +}
index 82a008c3c123dae0606b36578081c0920598f0b5,0000000000000000000000000000000000000000..c33cadd829e9c5d5cfd2d4a4b0190a48227cdf53
mode 100644,000000..100644
--- /dev/null
@@@ -1,641 -1,0 +1,641 @@@
-     kernellist_c[] =
 +/*
 + * Note: this file was generated by the Gromacs c kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_c_h
 +#define nb_kernel_c_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c;
 +nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c;
 +
 +
 +nb_kernel_info_t
-     kernellist_c_size = sizeof(kernellist_c)/sizeof(kernellist_c[0]);
++kernellist_c[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c", "c", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c", "c", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c", "c", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c", "c", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c", "c", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c", "c", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c", "c", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c", "c", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c", "c", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c", "c", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c", "c", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c", "c", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c", "c", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c", "c", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c", "c", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c", "c", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c", "c", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_c_size = sizeof(kernellist_c)/sizeof(kernellist_c[0]);
 +
 +#endif
index bc3dd87a783d7f0c58c1ddce8957d12a7100fbdd,0000000000000000000000000000000000000000..95a7ef2f7c29df3bd6fc166142d4872fa41ad9bc
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_sse2_double[] =
 +/*
 + * Note: this file was generated by the Gromacs sse2_double kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_sse2_double_h
 +#define nb_kernel_sse2_double_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double;
 +
 +
 +nb_kernel_info_t
-     kernellist_sse2_double_size = sizeof(kernellist_sse2_double)/sizeof(kernellist_sse2_double[0]);
++kernellist_sse2_double[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse2_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse2_double", "sse2_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse2_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse2_double", "sse2_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_double", "sse2_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_double", "sse2_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse2_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse2_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse2_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse2_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse2_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_double", "sse2_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_sse2_double_size = sizeof(kernellist_sse2_double)/sizeof(kernellist_sse2_double[0]);
 +
 +#endif
index 2a5aa767d9ab2efe9f88148c5d355dd5e8501844,0000000000000000000000000000000000000000..ddaa369874fb1c0fc1c981d2c238668c12d63770
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_sse2_single[] =
 +/*
 + * Note: this file was generated by the Gromacs sse2_single kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_sse2_single_h
 +#define nb_kernel_sse2_single_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single;
 +
 +
 +nb_kernel_info_t
-     kernellist_sse2_single_size = sizeof(kernellist_sse2_single)/sizeof(kernellist_sse2_single[0]);
++kernellist_sse2_single[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse2_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse2_single", "sse2_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse2_single", "sse2_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse2_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse2_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse2_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse2_single", "sse2_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_sse2_single_size = sizeof(kernellist_sse2_single)/sizeof(kernellist_sse2_single[0]);
 +
 +#endif
index 8d02013eb83ff7d174bde75e95a442e79a4e1e33,0000000000000000000000000000000000000000..cf5b9086093c6c9df0d5aab1e526ee37d98caee1
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_sse4_1_double[] =
 +/*
 + * Note: this file was generated by the Gromacs sse4_1_double kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_sse4_1_double_h
 +#define nb_kernel_sse4_1_double_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_double;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double;
 +
 +
 +nb_kernel_info_t
-     kernellist_sse4_1_double_size = sizeof(kernellist_sse4_1_double)/sizeof(kernellist_sse4_1_double[0]);
++kernellist_sse4_1_double[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_double", "sse4_1_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_double", "sse4_1_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_sse4_1_double_size = sizeof(kernellist_sse4_1_double)/sizeof(kernellist_sse4_1_double[0]);
 +
 +#endif
index 6c15bd3f8c837d50cf43f6b7350ce3624043e0c4,0000000000000000000000000000000000000000..6deaef13a177da3ea2d6a94972ea46880fd097da
mode 100644,000000..100644
--- /dev/null
@@@ -1,465 -1,0 +1,465 @@@
-     kernellist_sse4_1_single[] =
 +/*
 + * Note: this file was generated by the Gromacs sse4_1_single kernel generator.
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website.
 + */
 +#ifndef nb_kernel_sse4_1_single_h
 +#define nb_kernel_sse4_1_single_h
 +
 +#include "../nb_kernel.h"
 +
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single;
 +nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single;
 +
 +
 +nb_kernel_info_t
-     kernellist_sse4_1_single_size = sizeof(kernellist_sse4_1_single)/sizeof(kernellist_sse4_1_single[0]);
++kernellist_sse4_1_single[] =
 +{
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
 +    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sse4_1_single", "sse4_1_single", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
 +};
 +
 +int
++kernellist_sse4_1_single_size = sizeof(kernellist_sse4_1_single)/sizeof(kernellist_sse4_1_single[0]);
 +
 +#endif
index 67535fcb101a0604e27d88d87f8b0bb5b3a672b8,0000000000000000000000000000000000000000..afe5f56351bd72b9186b25cd67aeeb125dcc3599
mode 100644,000000..100644
--- /dev/null
@@@ -1,675 -1,0 +1,675 @@@
-         nl->kernelptr_vf       = gmx_nb_generic_adress_kernel;
-         nl->kernelptr_f        = gmx_nb_generic_adress_kernel;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "smalloc.h"
 +#include "ns.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "force.h"
 +#include "names.h"
 +#include "main.h"
 +#include "xvgr.h"
 +#include "gmx_fatal.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "nrnb.h"
 +#include "smalloc.h"
 +#include "nonbonded.h"
 +
 +#include "nb_kernel.h"
 +#include "nb_free_energy.h"
 +#include "nb_generic.h"
 +#include "nb_generic_cg.h"
 +#include "nb_generic_adress.h"
 +
 +/* Different default (c) and accelerated interaction-specific kernels */
 +#include "nb_kernel_c/nb_kernel_c.h"
 +
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 +#    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 +#    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +static gmx_bool            nonbonded_setup_done  = FALSE;
 +
 +
 +void
 +gmx_nonbonded_setup(FILE *         fplog,
 +                    t_forcerec *   fr,
 +                    gmx_bool       bGenericKernelOnly)
 +{
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 +#endif
 +    /* Here we are guaranteed only one thread made it. */
 +    if (nonbonded_setup_done == FALSE)
 +    {
 +        if (bGenericKernelOnly == FALSE)
 +        {
 +            /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 +            nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 +
 +            if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
 +            {
 +                /* Add interaction-specific kernels for different architectures */
 +                /* Single precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 +#endif
 +                /* Double precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 +#endif
 +                ; /* empty statement to avoid a completely empty block */
 +            }
 +        }
 +        /* Create a hash for faster lookups */
 +        nb_kernel_list_hash_init();
 +
 +        nonbonded_setup_done = TRUE;
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 +#endif
 +}
 +
 +
 +
 +void
 +gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
 +{
 +    const char *     elec;
 +    const char *     elec_mod;
 +    const char *     vdw;
 +    const char *     vdw_mod;
 +    const char *     geom;
 +    const char *     other;
 +    const char *     vf;
 +
 +    struct
 +    {
 +        const char *  arch;
 +        int           simd_padding_width;
 +    }
 +    arch_and_padding[] =
 +    {
 +        /* Single precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 +        { "avx_256_single", 8 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +        { "avx_128_fma_single", 4 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +        { "sse4_1_single", 4 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 +        { "sse2_single", 4 },
 +#endif
 +        /* Double precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 +        { "avx_256_double", 4 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +        /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 +         * since the kernels execute a loop unrolled a factor 2, followed by
 +         * a possible single odd-element epilogue.
 +         */
 +        { "avx_128_fma_double", 1 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sse2_double", 1 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sse4_1_double", 1 },
 +#endif
 +        { "c", 1 },
 +    };
 +    int              narch = asize(arch_and_padding);
 +    int              i;
 +
 +    if (nonbonded_setup_done == FALSE)
 +    {
 +        /* We typically call this setup routine before starting timers,
 +         * but if that has not been done for whatever reason we do it now.
 +         */
 +        gmx_nonbonded_setup(NULL, NULL, FALSE);
 +    }
 +
 +    /* Not used yet */
 +    other = "";
 +
 +    nl->kernelptr_vf = NULL;
 +    nl->kernelptr_v  = NULL;
 +    nl->kernelptr_f  = NULL;
 +
 +    elec     = gmx_nbkernel_elec_names[nl->ielec];
 +    elec_mod = eintmod_names[nl->ielecmod];
 +    vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 +    vdw_mod  = eintmod_names[nl->ivdwmod];
 +    geom     = gmx_nblist_geometry_names[nl->igeometry];
 +
 +    if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 +    {
-         nl->kernelptr_vf       = gmx_nb_free_energy_kernel;
-         nl->kernelptr_f        = gmx_nb_free_energy_kernel;
++        nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
++        nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 +        nl->simd_padding_width = 1;
 +        return;
 +    }
 +
 +    if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 +    {
-         nl->kernelptr_vf       = gmx_nb_generic_cg_kernel;
-         nl->kernelptr_f        = gmx_nb_generic_cg_kernel;
++        nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
++        nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 +        nl->simd_padding_width = 1;
 +    }
 +    else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 +    {
-             nl->kernelptr_vf       = nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
++        nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
++        nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 +        nl->simd_padding_width = 1;
 +    }
 +    else
 +    {
 +        /* Try to find a specific kernel first */
 +
 +        for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 +        {
-             nl->kernelptr_f        = nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
++            nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 +            nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +        }
 +        for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 +        {
-                 nl->kernelptr_f        = nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
++            nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 +            nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +
 +            /* If there is not force-only optimized kernel, is there a potential & force one? */
 +            if (nl->kernelptr_f == NULL)
 +            {
-             nl->kernelptr_vf       = gmx_nb_generic_kernel;
-             nl->kernelptr_f        = gmx_nb_generic_kernel;
++                nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 +                nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +            }
 +        }
 +
 +        /* Give up, pick a generic one instead */
 +        if (nl->kernelptr_vf == NULL)
 +        {
++            nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
++            nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 +            nl->simd_padding_width = 1;
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "WARNING - Slow generic NB kernel used for neighborlist with\n"
 +                        "    Elec: '%s', Modifier: '%s'\n"
 +                        "    Vdw:  '%s', Modifier: '%s'\n"
 +                        "    Geom: '%s', Other: '%s'\n\n",
 +                        elec, elec_mod, vdw, vdw_mod, geom, other);
 +            }
 +        }
 +    }
 +
 +    return;
 +}
 +
 +void do_nonbonded(t_commrec *cr, t_forcerec *fr,
 +                  rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 +                  gmx_grppairener_t *grppener, rvec box_size,
 +                  t_nrnb *nrnb, real *lambda, real *dvdl,
 +                  int nls, int eNL, int flags)
 +{
 +    t_nblist *        nlist;
 +    int               n, n0, n1, i, i0, i1, sz, range;
 +    t_nblists *       nblists;
 +    nb_kernel_data_t  kernel_data;
 +    nb_kernel_t *     kernelptr = NULL;
 +    rvec *            f;
 +
 +    kernel_data.flags                   = flags;
 +    kernel_data.exclusions              = excl;
 +    kernel_data.lambda                  = lambda;
 +    kernel_data.dvdl                    = dvdl;
 +
 +    if (fr->bAllvsAll)
 +    {
 +        return;
 +    }
 +
 +    if (eNL >= 0)
 +    {
 +        i0 = eNL;
 +        i1 = i0+1;
 +    }
 +    else
 +    {
 +        i0 = 0;
 +        i1 = eNL_NR;
 +    }
 +
 +    if (nls >= 0)
 +    {
 +        n0 = nls;
 +        n1 = nls+1;
 +    }
 +    else
 +    {
 +        n0 = 0;
 +        n1 = fr->nnblists;
 +    }
 +
 +    for (n = n0; (n < n1); n++)
 +    {
 +        nblists = &fr->nblists[n];
 +
 +        kernel_data.table_elec              = &nblists->table_elec;
 +        kernel_data.table_vdw               = &nblists->table_vdw;
 +        kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 +
 +        for (range = 0; range < 2; range++)
 +        {
 +            /* Are we doing short/long-range? */
 +            if (range == 0)
 +            {
 +                /* Short-range */
 +                if (!(flags & GMX_NONBONDED_DO_SR))
 +                {
 +                    continue;
 +                }
 +                kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 +                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 +                kernel_data.energygrp_polarization  = grppener->ener[egGB];
 +                nlist = nblists->nlist_sr;
 +                f                                   = f_shortrange;
 +            }
 +            else if (range == 1)
 +            {
 +                /* Long-range */
 +                if (!(flags & GMX_NONBONDED_DO_LR))
 +                {
 +                    continue;
 +                }
 +                kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 +                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 +                kernel_data.energygrp_polarization  = grppener->ener[egGB];
 +                nlist = nblists->nlist_lr;
 +                f                                   = f_longrange;
 +            }
 +
 +            for (i = i0; (i < i1); i++)
 +            {
 +                if (nlist[i].nri > 0)
 +                {
 +                    if (flags & GMX_NONBONDED_DO_POTENTIAL)
 +                    {
 +                        /* Potential and force */
 +                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 +                    }
 +                    else
 +                    {
 +                        /* Force only, no potential */
 +                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 +                    }
 +
 +                    if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 +                    {
 +                        /* We don't need the non-perturbed interactions */
 +                        continue;
 +                    }
 +                    (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void
 +nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 +{
 +    gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 +                "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 +                "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 +                "a smaller molecule you are decoupling during a free energy calculation.\n"
 +                "Since interactions at distances beyond the table cannot be computed,\n"
 +                "they are skipped until they are inside the table limit again. You will\n"
 +                "only see this message once, even if it occurs for several interactions.\n\n"
 +                "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 +                "probably something wrong with your system. Only change the table-extension\n"
 +                "distance in the mdp file if you are really sure that is the reason.\n",
 +                glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 +                x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 +                glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 +    }
 +}
 +
 +
 +
 +/* This might logically belong better in the nb_generic.c module, but it is only
 + * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 + * extra functional call for every single pair listed in the topology.
 + */
 +static real
 +nb_evaluate_single(real r2, real tabscale, real *vftab,
 +                   real qq, real c6, real c12, real *velec, real *vvdw)
 +{
 +    real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 +    int        ntab;
 +
 +    /* Do the tabulated interactions - first table lookup */
 +    rinv             = gmx_invsqrt(r2);
 +    r                = r2*rinv;
 +    rtab             = r*tabscale;
 +    ntab             = rtab;
 +    eps              = rtab-ntab;
 +    eps2             = eps*eps;
 +    ntab             = 12*ntab;
 +    /* Electrostatics */
 +    Y                = vftab[ntab];
 +    F                = vftab[ntab+1];
 +    Geps             = eps*vftab[ntab+2];
 +    Heps2            = eps2*vftab[ntab+3];
 +    Fp               = F+Geps+Heps2;
 +    VVe              = Y+eps*Fp;
 +    FFe              = Fp+Geps+2.0*Heps2;
 +    /* Dispersion */
 +    Y                = vftab[ntab+4];
 +    F                = vftab[ntab+5];
 +    Geps             = eps*vftab[ntab+6];
 +    Heps2            = eps2*vftab[ntab+7];
 +    Fp               = F+Geps+Heps2;
 +    VVd              = Y+eps*Fp;
 +    FFd              = Fp+Geps+2.0*Heps2;
 +    /* Repulsion */
 +    Y                = vftab[ntab+8];
 +    F                = vftab[ntab+9];
 +    Geps             = eps*vftab[ntab+10];
 +    Heps2            = eps2*vftab[ntab+11];
 +    Fp               = F+Geps+Heps2;
 +    VVr              = Y+eps*Fp;
 +    FFr              = Fp+Geps+2.0*Heps2;
 +
 +    *velec           = qq*VVe;
 +    *vvdw            = c6*VVd+c12*VVr;
 +
 +    fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 +
 +    return fscal;
 +}
 +
 +
 +real
 +do_nonbonded_listed(int ftype, int nbonds,
 +                    const t_iatom iatoms[], const t_iparams iparams[],
 +                    const rvec x[], rvec f[], rvec fshift[],
 +                    const t_pbc *pbc, const t_graph *g,
 +                    real *lambda, real *dvdl,
 +                    const t_mdatoms *md,
 +                    const t_forcerec *fr, gmx_grppairener_t *grppener,
 +                    int *global_atom_index)
 +{
 +    int              ielec, ivdw;
 +    real             qq, c6, c12;
 +    rvec             dx;
 +    ivec             dt;
 +    int              i, j, itype, ai, aj, gid;
 +    int              fshift_index;
 +    real             r2, rinv;
 +    real             fscal, velec, vvdw;
 +    real *           energygrp_elec;
 +    real *           energygrp_vdw;
 +    static gmx_bool  warned_rlimit = FALSE;
 +    /* Free energy stuff */
 +    gmx_bool         bFreeEnergy;
 +    real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 +    real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 +
 +
 +    switch (ftype)
 +    {
 +        case F_LJ14:
 +        case F_LJC14_Q:
 +            energygrp_elec = grppener->ener[egCOUL14];
 +            energygrp_vdw  = grppener->ener[egLJ14];
 +            break;
 +        case F_LJC_PAIRS_NB:
 +            energygrp_elec = grppener->ener[egCOULSR];
 +            energygrp_vdw  = grppener->ener[egLJSR];
 +            break;
 +        default:
 +            energygrp_elec = NULL; /* Keep compiler happy */
 +            energygrp_vdw  = NULL; /* Keep compiler happy */
 +            gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 +            break;
 +    }
 +
 +    if (fr->efep != efepNO)
 +    {
 +        /* Lambda factor for state A=1-lambda and B=lambda */
 +        LFC[0] = 1.0 - lambda[efptCOUL];
 +        LFV[0] = 1.0 - lambda[efptVDW];
 +        LFC[1] = lambda[efptCOUL];
 +        LFV[1] = lambda[efptVDW];
 +
 +        /*derivative of the lambda factor for state A and B */
 +        DLF[0] = -1;
 +        DLF[1] = 1;
 +
 +        /* precalculate */
 +        sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 +        sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 +
 +        for (i = 0; i < 2; i++)
 +        {
 +            lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 +            dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 +            lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 +            dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 +        }
 +    }
 +    else
 +    {
 +        sigma2_min = sigma2_def = 0;
 +    }
 +
 +    bFreeEnergy = FALSE;
 +    for (i = 0; (i < nbonds); )
 +    {
 +        itype = iatoms[i++];
 +        ai    = iatoms[i++];
 +        aj    = iatoms[i++];
 +        gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 +
 +        /* Get parameters */
 +        switch (ftype)
 +        {
 +            case F_LJ14:
 +                bFreeEnergy =
 +                    (fr->efep != efepNO &&
 +                     ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 +                      iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 +                      iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 +                qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 +                c6               = iparams[itype].lj14.c6A;
 +                c12              = iparams[itype].lj14.c12A;
 +                break;
 +            case F_LJC14_Q:
 +                qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 +                c6               = iparams[itype].ljc14.c6;
 +                c12              = iparams[itype].ljc14.c12;
 +                break;
 +            case F_LJC_PAIRS_NB:
 +                qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 +                c6               = iparams[itype].ljcnb.c6;
 +                c12              = iparams[itype].ljcnb.c12;
 +                break;
 +            default:
 +                /* Cannot happen since we called gmx_fatal() above in this case */
 +                qq = c6 = c12 = 0; /* Keep compiler happy */
 +                break;
 +        }
 +
 +        /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 +         * included in the general nfbp array now. This means the tables are scaled down by the
 +         * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 +         * be scaled up.
 +         */
 +        c6  *= 6.0;
 +        c12 *= 12.0;
 +
 +        /* Do we need to apply full periodic boundary conditions? */
 +        if (fr->bMolPBC == TRUE)
 +        {
 +            fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 +        }
 +        else
 +        {
 +            fshift_index = CENTRAL;
 +            rvec_sub(x[ai], x[aj], dx);
 +        }
 +        r2           = norm2(dx);
 +
 +        if (r2 >= fr->tab14.r*fr->tab14.r)
 +        {
 +            if (warned_rlimit == FALSE)
 +            {
 +                nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 +                warned_rlimit = TRUE;
 +            }
 +            continue;
 +        }
 +
 +        if (bFreeEnergy)
 +        {
 +            /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 +            qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 +            c6B              = iparams[itype].lj14.c6B*6.0;
 +            c12B             = iparams[itype].lj14.c12B*12.0;
 +
 +            fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 +                                                              fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 +                                                              LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 +                                                              fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 +        }
 +        else
 +        {
 +            /* Evaluate tabulated interaction without free energy */
 +            fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 +        }
 +
 +        energygrp_elec[gid]  += velec;
 +        energygrp_vdw[gid]   += vvdw;
 +        svmul(fscal, dx, dx);
 +
 +        /* Add the forces */
 +        rvec_inc(f[ai], dx);
 +        rvec_dec(f[aj], dx);
 +
 +        if (g)
 +        {
 +            /* Correct the shift forces using the graph */
 +            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 +            fshift_index = IVEC2IS(dt);
 +        }
 +        if (fshift_index != CENTRAL)
 +        {
 +            rvec_inc(fshift[fshift_index], dx);
 +            rvec_dec(fshift[CENTRAL], dx);
 +        }
 +    }
 +    return 0.0;
 +}
index 2bcc288e5d9708a9bddbddb4e700ff906d409b34,0000000000000000000000000000000000000000..4ec13d321e3ad13183aeef39fe667d25ef7108de
mode 100644,000000..100644
--- /dev/null
@@@ -1,3866 -1,0 +1,3866 @@@
-         /* check validity of options */
-         if (fep->n_lambda > 0 && ir->rlist < max(ir->rvdw, ir->rcoulomb))
-         {
-             sprintf(warn_buf,
-                     "For foreign lambda free energy differences it is assumed that the soft-core interactions have no effect beyond the neighborlist cut-off");
-             warning(wi, warn_buf);
-         }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <limits.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "index.h"
 +#include "symtab.h"
 +#include "string2.h"
 +#include "readinp.h"
 +#include "warninp.h"
 +#include "readir.h"
 +#include "toputil.h"
 +#include "index.h"
 +#include "network.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "chargegroup.h"
 +#include "inputrec.h"
 +
 +#define MAXPTR 254
 +#define NOGID  255
 +#define MAXLAMBDAS 1024
 +
 +/* Resource parameters
 + * Do not change any of these until you read the instruction
 + * in readinp.h. Some cpp's do not take spaces after the backslash
 + * (like the c-shell), which will give you a very weird compiler
 + * message.
 + */
 +
 +static char tcgrps[STRLEN], tau_t[STRLEN], ref_t[STRLEN],
 +            acc[STRLEN], accgrps[STRLEN], freeze[STRLEN], frdim[STRLEN],
 +            energy[STRLEN], user1[STRLEN], user2[STRLEN], vcm[STRLEN], xtc_grps[STRLEN],
 +            couple_moltype[STRLEN], orirefitgrp[STRLEN], egptable[STRLEN], egpexcl[STRLEN],
 +            wall_atomtype[STRLEN], wall_density[STRLEN], deform[STRLEN], QMMM[STRLEN];
 +static char   fep_lambda[efptNR][STRLEN];
 +static char   lambda_weights[STRLEN];
 +static char **pull_grp;
 +static char **rot_grp;
 +static char   anneal[STRLEN], anneal_npoints[STRLEN],
 +              anneal_time[STRLEN], anneal_temp[STRLEN];
 +static char   QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN],
 +              bSH[STRLEN], CASorbitals[STRLEN], CASelectrons[STRLEN], SAon[STRLEN],
 +              SAoff[STRLEN], SAsteps[STRLEN], bTS[STRLEN], bOPT[STRLEN];
 +static char efield_x[STRLEN], efield_xt[STRLEN], efield_y[STRLEN],
 +            efield_yt[STRLEN], efield_z[STRLEN], efield_zt[STRLEN];
 +
 +enum {
 +    egrptpALL,         /* All particles have to be a member of a group.     */
 +    egrptpALL_GENREST, /* A rest group with name is generated for particles *
 +                        * that are not part of any group.                   */
 +    egrptpPART,        /* As egrptpALL_GENREST, but no name is generated    *
 +                        * for the rest group.                               */
 +    egrptpONE          /* Merge all selected groups into one group,         *
 +                        * make a rest group for the remaining particles.    */
 +};
 +
 +
 +void init_ir(t_inputrec *ir, t_gromppopts *opts)
 +{
 +    snew(opts->include, STRLEN);
 +    snew(opts->define, STRLEN);
 +    snew(ir->fepvals, 1);
 +    snew(ir->expandedvals, 1);
 +    snew(ir->simtempvals, 1);
 +}
 +
 +static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas)
 +{
 +
 +    int i;
 +
 +    for (i = 0; i < ntemps; i++)
 +    {
 +        /* simple linear scaling -- allows more control */
 +        if (simtemp->eSimTempScale == esimtempLINEAR)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i];
 +        }
 +        else if (simtemp->eSimTempScale == esimtempGEOMETRIC)  /* should give roughly equal acceptance for constant heat capacity . . . */
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low, (1.0*i)/(ntemps-1));
 +        }
 +        else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
 +        }
 +        else
 +        {
 +            char errorstr[128];
 +            sprintf(errorstr, "eSimTempScale=%d not defined", simtemp->eSimTempScale);
 +            gmx_fatal(FARGS, errorstr);
 +        }
 +    }
 +}
 +
 +
 +
 +static void _low_check(gmx_bool b, char *s, warninp_t wi)
 +{
 +    if (b)
 +    {
 +        warning_error(wi, s);
 +    }
 +}
 +
 +static void check_nst(const char *desc_nst, int nst,
 +                      const char *desc_p, int *p,
 +                      warninp_t wi)
 +{
 +    char buf[STRLEN];
 +
 +    if (*p > 0 && *p % nst != 0)
 +    {
 +        /* Round up to the next multiple of nst */
 +        *p = ((*p)/nst + 1)*nst;
 +        sprintf(buf, "%s should be a multiple of %s, changing %s to %d\n",
 +                desc_p, desc_nst, desc_p, *p);
 +        warning(wi, buf);
 +    }
 +}
 +
 +static gmx_bool ir_NVE(const t_inputrec *ir)
 +{
 +    return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
 +}
 +
 +static int lcd(int n1, int n2)
 +{
 +    int d, i;
 +
 +    d = 1;
 +    for (i = 2; (i <= n1 && i <= n2); i++)
 +    {
 +        if (n1 % i == 0 && n2 % i == 0)
 +        {
 +            d = i;
 +        }
 +    }
 +
 +    return d;
 +}
 +
 +static void process_interaction_modifier(const t_inputrec *ir, int *eintmod)
 +{
 +    if (*eintmod == eintmodPOTSHIFT_VERLET)
 +    {
 +        if (ir->cutoff_scheme == ecutsVERLET)
 +        {
 +            *eintmod = eintmodPOTSHIFT;
 +        }
 +        else
 +        {
 +            *eintmod = eintmodNONE;
 +        }
 +    }
 +}
 +
 +void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
 +              warninp_t wi)
 +/* Check internal consistency */
 +{
 +    /* Strange macro: first one fills the err_buf, and then one can check
 +     * the condition, which will print the message and increase the error
 +     * counter.
 +     */
 +#define CHECK(b) _low_check(b, err_buf, wi)
 +    char        err_buf[256], warn_buf[STRLEN];
 +    int         i, j;
 +    int         ns_type  = 0;
 +    real        dt_coupl = 0;
 +    real        dt_pcoupl;
 +    int         nstcmin;
 +    t_lambda   *fep    = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +
 +    set_warning_line(wi, mdparin, -1);
 +
 +    /* BASIC CUT-OFF STUFF */
 +    if (ir->rcoulomb < 0)
 +    {
 +        warning_error(wi, "rcoulomb should be >= 0");
 +    }
 +    if (ir->rvdw < 0)
 +    {
 +        warning_error(wi, "rvdw should be >= 0");
 +    }
 +    if (ir->rlist < 0 &&
 +        !(ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_drift > 0))
 +    {
 +        warning_error(wi, "rlist should be >= 0");
 +    }
 +
 +    process_interaction_modifier(ir, &ir->coulomb_modifier);
 +    process_interaction_modifier(ir, &ir->vdw_modifier);
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        /* BASIC CUT-OFF STUFF */
 +        if (ir->rlist == 0 ||
 +            !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
 +              (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)    && ir->rvdw     > ir->rlist)))
 +        {
 +            /* No switched potential and/or no twin-range:
 +             * we can set the long-range cut-off to the maximum of the other cut-offs.
 +             */
 +            ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
 +        }
 +        else if (ir->rlistlong < 0)
 +        {
 +            ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
 +            sprintf(warn_buf, "rlistlong was not set, setting it to %g (no buffer)",
 +                    ir->rlistlong);
 +            warning(wi, warn_buf);
 +        }
 +        if (ir->rlistlong == 0 && ir->ePBC != epbcNONE)
 +        {
 +            warning_error(wi, "Can not have an infinite cut-off with PBC");
 +        }
 +        if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist))
 +        {
 +            warning_error(wi, "rlistlong can not be shorter than rlist");
 +        }
 +        if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
 +        {
 +            warning_error(wi, "Can not have nstlist<=0 with twin-range interactions");
 +        }
 +    }
 +
 +    if (ir->rlistlong == ir->rlist)
 +    {
 +        ir->nstcalclr = 0;
 +    }
 +    else if (ir->rlistlong > ir->rlist && ir->nstcalclr == 0)
 +    {
 +        warning_error(wi, "With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number");
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        real rc_max;
 +
 +        /* Normal Verlet type neighbor-list, currently only limited feature support */
 +        if (inputrec2nboundeddim(ir) < 3)
 +        {
 +            warning_error(wi, "With Verlet lists only full pbc or pbc=xy with walls is supported");
 +        }
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            warning_error(wi, "With Verlet lists rcoulomb!=rvdw is not supported");
 +        }
 +        if (ir->vdwtype != evdwCUT)
 +        {
 +            warning_error(wi, "With Verlet lists only cut-off LJ interactions are supported");
 +        }
 +        if (!(ir->coulombtype == eelCUT ||
 +              (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC) ||
 +              EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD))
 +        {
 +            warning_error(wi, "With Verlet lists only cut-off, reaction-field, PME and Ewald electrostatics are supported");
 +        }
 +
 +        if (ir->nstlist <= 0)
 +        {
 +            warning_error(wi, "With Verlet lists nstlist should be larger than 0");
 +        }
 +
 +        if (ir->nstlist < 10)
 +        {
 +            warning_note(wi, "With Verlet lists the optimal nstlist is >= 10, with GPUs >= 20. Note that with the Verlet scheme, nstlist has no effect on the accuracy of your simulation.");
 +        }
 +
 +        rc_max = max(ir->rvdw, ir->rcoulomb);
 +
 +        if (ir->verletbuf_drift <= 0)
 +        {
 +            if (ir->verletbuf_drift == 0)
 +            {
 +                warning_error(wi, "Can not have an energy drift of exactly 0");
 +            }
 +
 +            if (ir->rlist < rc_max)
 +            {
 +                warning_error(wi, "With verlet lists rlist can not be smaller than rvdw or rcoulomb");
 +            }
 +
 +            if (ir->rlist == rc_max && ir->nstlist > 1)
 +            {
 +                warning_note(wi, "rlist is equal to rvdw and/or rcoulomb: there is no explicit Verlet buffer. The cluster pair list does have a buffering effect, but choosing a larger rlist might be necessary for good energy conservation.");
 +            }
 +        }
 +        else
 +        {
 +            if (ir->rlist > rc_max)
 +            {
 +                warning_note(wi, "You have set rlist larger than the interaction cut-off, but you also have verlet-buffer-drift > 0. Will set rlist using verlet-buffer-drift.");
 +            }
 +
 +            if (ir->nstlist == 1)
 +            {
 +                /* No buffer required */
 +                ir->rlist = rc_max;
 +            }
 +            else
 +            {
 +                if (EI_DYNAMICS(ir->eI))
 +                {
 +                    if (EI_MD(ir->eI) && ir->etc == etcNO)
 +                    {
 +                        warning_error(wi, "Temperature coupling is required for calculating rlist using the energy drift with verlet-buffer-drift > 0. Either use temperature coupling or set rlist yourself together with verlet-buffer-drift = -1.");
 +                    }
 +
 +                    if (inputrec2nboundeddim(ir) < 3)
 +                    {
 +                        warning_error(wi, "The box volume is required for calculating rlist from the energy drift with verlet-buffer-drift > 0. You are using at least one unbounded dimension, so no volume can be computed. Either use a finite box, or set rlist yourself together with verlet-buffer-drift = -1.");
 +                    }
 +                    /* Set rlist temporarily so we can continue processing */
 +                    ir->rlist = rc_max;
 +                }
 +                else
 +                {
 +                    /* Set the buffer to 5% of the cut-off */
 +                    ir->rlist = 1.05*rc_max;
 +                }
 +            }
 +        }
 +
 +        /* No twin-range calculations with Verlet lists */
 +        ir->rlistlong = ir->rlist;
 +    }
 +
 +    if (ir->nstcalclr == -1)
 +    {
 +        /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */
 +        ir->nstcalclr = ir->nstlist;
 +    }
 +    else if (ir->nstcalclr > 0)
 +    {
 +        if (ir->nstlist > 0 && (ir->nstlist % ir->nstcalclr != 0))
 +        {
 +            warning_error(wi, "nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist");
 +        }
 +    }
 +    else if (ir->nstcalclr < -1)
 +    {
 +        warning_error(wi, "nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
 +    }
 +
 +    if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr > 1)
 +    {
 +        warning_error(wi, "When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
 +    }
 +
 +    /* GENERAL INTEGRATOR STUFF */
 +    if (!(ir->eI == eiMD || EI_VV(ir->eI)))
 +    {
 +        ir->etc = etcNO;
 +    }
 +    if (ir->eI == eiVVAK)
 +    {
 +        sprintf(warn_buf, "Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s", ei_names[eiVVAK], ei_names[eiMD], ei_names[eiVV]);
 +        warning_note(wi, warn_buf);
 +    }
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        ir->epc = epcNO;
 +    }
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        if (ir->nstcalcenergy < 0)
 +        {
 +            ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
 +            if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
 +            {
 +                /* nstcalcenergy larger than nstener does not make sense.
 +                 * We ideally want nstcalcenergy=nstener.
 +                 */
 +                if (ir->nstlist > 0)
 +                {
 +                    ir->nstcalcenergy = lcd(ir->nstenergy, ir->nstlist);
 +                }
 +                else
 +                {
 +                    ir->nstcalcenergy = ir->nstenergy;
 +                }
 +            }
 +        }
 +        else if ( (ir->nstenergy > 0 && ir->nstcalcenergy > ir->nstenergy) ||
 +                  (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 &&
 +                   (ir->nstcalcenergy > ir->fepvals->nstdhdl) ) )
 +
 +        {
 +            const char *nsten    = "nstenergy";
 +            const char *nstdh    = "nstdhdl";
 +            const char *min_name = nsten;
 +            int         min_nst  = ir->nstenergy;
 +
 +            /* find the smallest of ( nstenergy, nstdhdl ) */
 +            if (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 &&
 +                (ir->fepvals->nstdhdl < ir->nstenergy) )
 +            {
 +                min_nst  = ir->fepvals->nstdhdl;
 +                min_name = nstdh;
 +            }
 +            /* If the user sets nstenergy small, we should respect that */
 +            sprintf(warn_buf,
 +                    "Setting nstcalcenergy (%d) equal to %s (%d)",
 +                    ir->nstcalcenergy, min_name, min_nst);
 +            warning_note(wi, warn_buf);
 +            ir->nstcalcenergy = min_nst;
 +        }
 +
 +        if (ir->epc != epcNO)
 +        {
 +            if (ir->nstpcouple < 0)
 +            {
 +                ir->nstpcouple = ir_optimal_nstpcouple(ir);
 +            }
 +        }
 +        if (IR_TWINRANGE(*ir))
 +        {
 +            check_nst("nstlist", ir->nstlist,
 +                      "nstcalcenergy", &ir->nstcalcenergy, wi);
 +            if (ir->epc != epcNO)
 +            {
 +                check_nst("nstlist", ir->nstlist,
 +                          "nstpcouple", &ir->nstpcouple, wi);
 +            }
 +        }
 +
 +        if (ir->nstcalcenergy > 0)
 +        {
 +            if (ir->efep != efepNO)
 +            {
 +                /* nstdhdl should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy", ir->nstcalcenergy,
 +                          "nstdhdl", &ir->fepvals->nstdhdl, wi);
 +                /* nstexpanded should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy", ir->nstcalcenergy,
 +                          "nstexpanded", &ir->expandedvals->nstexpanded, wi);
 +            }
 +            /* for storing exact averages nstenergy should be
 +             * a multiple of nstcalcenergy
 +             */
 +            check_nst("nstcalcenergy", ir->nstcalcenergy,
 +                      "nstenergy", &ir->nstenergy, wi);
 +        }
 +    }
 +
 +    /* LD STUFF */
 +    if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +        ir->bContinuation && ir->ld_seed != -1)
 +    {
 +        warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +    }
 +
 +    /* TPI STUFF */
 +    if (EI_TPI(ir->eI))
 +    {
 +        sprintf(err_buf, "TPI only works with pbc = %s", epbc_names[epbcXYZ]);
 +        CHECK(ir->ePBC != epbcXYZ);
 +        sprintf(err_buf, "TPI only works with ns = %s", ens_names[ensGRID]);
 +        CHECK(ir->ns_type != ensGRID);
 +        sprintf(err_buf, "with TPI nstlist should be larger than zero");
 +        CHECK(ir->nstlist <= 0);
 +        sprintf(err_buf, "TPI does not work with full electrostatics other than PME");
 +        CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
 +    }
 +
 +    /* SHAKE / LINCS */
 +    if ( (opts->nshake > 0) && (opts->bMorse) )
 +    {
 +        sprintf(warn_buf,
 +                "Using morse bond-potentials while constraining bonds is useless");
 +        warning(wi, warn_buf);
 +    }
 +
 +    if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +        ir->bContinuation && ir->ld_seed != -1)
 +    {
 +        warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +    }
 +    /* verify simulated tempering options */
 +
 +    if (ir->bSimTemp)
 +    {
 +        gmx_bool bAllTempZero = TRUE;
 +        for (i = 0; i < fep->n_lambda; i++)
 +        {
 +            sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[efptTEMPERATURE], fep->all_lambda[efptTEMPERATURE][i]);
 +            CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1));
 +            if (fep->all_lambda[efptTEMPERATURE][i] > 0)
 +            {
 +                bAllTempZero = FALSE;
 +            }
 +        }
 +        sprintf(err_buf, "if simulated tempering is on, temperature-lambdas may not be all zero");
 +        CHECK(bAllTempZero == TRUE);
 +
 +        sprintf(err_buf, "Simulated tempering is currently only compatible with md-vv");
 +        CHECK(ir->eI != eiVV);
 +
 +        /* check compatability of the temperature coupling with simulated tempering */
 +
 +        if (ir->etc == etcNOSEHOOVER)
 +        {
 +            sprintf(warn_buf, "Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering", etcoupl_names[ir->etc]);
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        /* check that the temperatures make sense */
 +
 +        sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)", ir->simtempvals->simtemp_high, ir->simtempvals->simtemp_low);
 +        CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low);
 +
 +        sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_high);
 +        CHECK(ir->simtempvals->simtemp_high <= 0);
 +
 +        sprintf(err_buf, "Lower simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_low);
 +        CHECK(ir->simtempvals->simtemp_low <= 0);
 +    }
 +
 +    /* verify free energy options */
 +
 +    if (ir->efep != efepNO)
 +    {
 +        fep = ir->fepvals;
 +        sprintf(err_buf, "The soft-core power is %d and can only be 1 or 2",
 +                fep->sc_power);
 +        CHECK(fep->sc_alpha != 0 && fep->sc_power != 1 && fep->sc_power != 2);
 +
 +        sprintf(err_buf, "The soft-core sc-r-power is %d and can only be 6 or 48",
 +                (int)fep->sc_r_power);
 +        CHECK(fep->sc_alpha != 0 && fep->sc_r_power != 6.0 && fep->sc_r_power != 48.0);
 +
-             sprintf(warn_buf, "With coulomb soft core, the reciprocal space calculation will not necessarily cancel.  It may be necessary to decrease the reciprocal space energy, and increase the cutoff radius to get sufficiently close matches to energies with free energy turned off.");
-             warning(wi, warn_buf);
 +        sprintf(err_buf, "Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero", fep->delta_lambda);
 +        CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state > 0) ||  (fep->init_lambda > 0)));
 +
 +        sprintf(err_buf, "Can't use postive delta-lambda (%g) with expanded ensemble simulations", fep->delta_lambda);
 +        CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED));
 +
 +        sprintf(err_buf, "Free-energy not implemented for Ewald");
 +        CHECK(ir->coulombtype == eelEWALD);
 +
 +        /* check validty of lambda inputs */
 +        if (fep->n_lambda == 0)
 +        {
 +            /* Clear output in case of no states:*/
 +            sprintf(err_buf, "init-lambda-state set to %d: no lambda states are defined.", fep->init_fep_state);
 +            CHECK((fep->init_fep_state >= 0) && (fep->n_lambda == 0));
 +        }
 +        else
 +        {
 +            sprintf(err_buf, "initial thermodynamic state %d does not exist, only goes to %d", fep->init_fep_state, fep->n_lambda-1);
 +            CHECK((fep->init_fep_state >= fep->n_lambda));
 +        }
 +
 +        sprintf(err_buf, "Lambda state must be set, either with init-lambda-state or with init-lambda");
 +        CHECK((fep->init_fep_state < 0) && (fep->init_lambda < 0));
 +
 +        sprintf(err_buf, "init-lambda=%g while init-lambda-state=%d. Lambda state must be set either with init-lambda-state or with init-lambda, but not both",
 +                fep->init_lambda, fep->init_fep_state);
 +        CHECK((fep->init_fep_state >= 0) && (fep->init_lambda >= 0));
 +
 +
 +
 +        if ((fep->init_lambda >= 0) && (fep->delta_lambda == 0))
 +        {
 +            int n_lambda_terms;
 +            n_lambda_terms = 0;
 +            for (i = 0; i < efptNR; i++)
 +            {
 +                if (fep->separate_dvdl[i])
 +                {
 +                    n_lambda_terms++;
 +                }
 +            }
 +            if (n_lambda_terms > 1)
 +            {
 +                sprintf(warn_buf, "If lambda vector states (fep-lambdas, coul-lambdas etc.) are set, don't use init-lambda to set lambda state (except for slow growth). Use init-lambda-state instead.");
 +                warning(wi, warn_buf);
 +            }
 +
 +            if (n_lambda_terms < 2 && fep->n_lambda > 0)
 +            {
 +                warning_note(wi,
 +                             "init-lambda is deprecated for setting lambda state (except for slow growth). Use init-lambda-state instead.");
 +            }
 +        }
 +
 +        for (j = 0; j < efptNR; j++)
 +        {
 +            for (i = 0; i < fep->n_lambda; i++)
 +            {
 +                sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[j], fep->all_lambda[j][i]);
 +                CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1));
 +            }
 +        }
 +
 +        if ((fep->sc_alpha > 0) && (!fep->bScCoul))
 +        {
 +            for (i = 0; i < fep->n_lambda; i++)
 +            {
 +                sprintf(err_buf, "For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.", i, fep->all_lambda[efptVDW][i],
 +                        fep->all_lambda[efptCOUL][i]);
 +                CHECK((fep->sc_alpha > 0) &&
 +                      (((fep->all_lambda[efptCOUL][i] > 0.0) &&
 +                        (fep->all_lambda[efptCOUL][i] < 1.0)) &&
 +                       ((fep->all_lambda[efptVDW][i] > 0.0) &&
 +                        (fep->all_lambda[efptVDW][i] < 1.0))));
 +            }
 +        }
 +
 +        if ((fep->bScCoul) && (EEL_PME(ir->coulombtype)))
 +        {
++            real sigma, lambda, r_sc;
++
++            sigma  = 0.34;
++            /* Maximum estimate for A and B charges equal with lambda power 1 */
++            lambda = 0.5;
++            r_sc   = pow(lambda*fep->sc_alpha*pow(sigma/ir->rcoulomb, fep->sc_r_power) + 1.0, 1.0/fep->sc_r_power);
++            sprintf(warn_buf, "With PME there is a minor soft core effect present at the cut-off, proportional to (LJsigma/rcoulomb)^%g. This could have a minor effect on energy conservation, but usually other effects dominate. With a common sigma value of %g nm the fraction of the particle-particle potential at the cut-off at lambda=%g is around %.1e, while ewald-rtol is %.1e.",
++                    fep->sc_r_power,
++                    sigma, lambda, r_sc - 1.0, ir->ewald_rtol);
++            warning_note(wi, warn_buf);
 +        }
 +
 +        /*  Free Energy Checks -- In an ideal world, slow growth and FEP would
 +            be treated differently, but that's the next step */
 +
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            for (j = 0; j < fep->n_lambda; j++)
 +            {
 +                sprintf(err_buf, "%s[%d] must be between 0 and 1", efpt_names[i], j);
 +                CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1));
 +            }
 +        }
 +    }
 +
 +    if ((ir->bSimTemp) || (ir->efep == efepEXPANDED))
 +    {
 +        fep    = ir->fepvals;
 +        expand = ir->expandedvals;
 +
 +        /* checking equilibration of weights inputs for validity */
 +
 +        sprintf(err_buf, "weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]);
 +        CHECK((expand->equil_n_at_lam > 0) && (expand->elmceq != elmceqNUMATLAM));
 +
 +        sprintf(err_buf, "weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_samples, elmceq_names[elmceqSAMPLES]);
 +        CHECK((expand->equil_samples > 0) && (expand->elmceq != elmceqSAMPLES));
 +
 +        sprintf(err_buf, "weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_steps, elmceq_names[elmceqSTEPS]);
 +        CHECK((expand->equil_steps > 0) && (expand->elmceq != elmceqSTEPS));
 +
 +        sprintf(err_buf, "weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_samples, elmceq_names[elmceqWLDELTA]);
 +        CHECK((expand->equil_wl_delta > 0) && (expand->elmceq != elmceqWLDELTA));
 +
 +        sprintf(err_buf, "weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_ratio, elmceq_names[elmceqRATIO]);
 +        CHECK((expand->equil_ratio > 0) && (expand->elmceq != elmceqRATIO));
 +
 +        sprintf(err_buf, "weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s",
 +                expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]);
 +        CHECK((expand->equil_n_at_lam <= 0) && (expand->elmceq == elmceqNUMATLAM));
 +
 +        sprintf(err_buf, "weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s",
 +                expand->equil_samples, elmceq_names[elmceqSAMPLES]);
 +        CHECK((expand->equil_samples <= 0) && (expand->elmceq == elmceqSAMPLES));
 +
 +        sprintf(err_buf, "weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s",
 +                expand->equil_steps, elmceq_names[elmceqSTEPS]);
 +        CHECK((expand->equil_steps <= 0) && (expand->elmceq == elmceqSTEPS));
 +
 +        sprintf(err_buf, "weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s",
 +                expand->equil_wl_delta, elmceq_names[elmceqWLDELTA]);
 +        CHECK((expand->equil_wl_delta <= 0) && (expand->elmceq == elmceqWLDELTA));
 +
 +        sprintf(err_buf, "weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s",
 +                expand->equil_ratio, elmceq_names[elmceqRATIO]);
 +        CHECK((expand->equil_ratio <= 0) && (expand->elmceq == elmceqRATIO));
 +
 +        sprintf(err_buf, "lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s",
 +                elmceq_names[elmceqWLDELTA], elamstats_names[elamstatsWL], elamstats_names[elamstatsWWL]);
 +        CHECK((expand->elmceq == elmceqWLDELTA) && (!EWL(expand->elamstats)));
 +
 +        sprintf(err_buf, "lmc-repeats (%d) must be greater than 0", expand->lmc_repeats);
 +        CHECK((expand->lmc_repeats <= 0));
 +        sprintf(err_buf, "minimum-var-min (%d) must be greater than 0", expand->minvarmin);
 +        CHECK((expand->minvarmin <= 0));
 +        sprintf(err_buf, "weight-c-range (%d) must be greater or equal to 0", expand->c_range);
 +        CHECK((expand->c_range < 0));
 +        sprintf(err_buf, "init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'",
 +                fep->init_fep_state, expand->lmc_forced_nstart);
 +        CHECK((fep->init_fep_state != 0) && (expand->lmc_forced_nstart > 0) && (expand->elmcmove != elmcmoveNO));
 +        sprintf(err_buf, "lmc-forced-nstart (%d) must not be negative", expand->lmc_forced_nstart);
 +        CHECK((expand->lmc_forced_nstart < 0));
 +        sprintf(err_buf, "init-lambda-state (%d) must be in the interval [0,number of lambdas)", fep->init_fep_state);
 +        CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda));
 +
 +        sprintf(err_buf, "init-wl-delta (%f) must be greater than or equal to 0", expand->init_wl_delta);
 +        CHECK((expand->init_wl_delta < 0));
 +        sprintf(err_buf, "wl-ratio (%f) must be between 0 and 1", expand->wl_ratio);
 +        CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1));
 +        sprintf(err_buf, "wl-scale (%f) must be between 0 and 1", expand->wl_scale);
 +        CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1));
 +
 +        /* if there is no temperature control, we need to specify an MC temperature */
 +        sprintf(err_buf, "If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number");
 +        if (expand->nstTij > 0)
 +        {
 +            sprintf(err_buf, "nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)",
 +                    expand->nstTij, ir->nstlog);
 +            CHECK((mod(expand->nstTij, ir->nstlog) != 0));
 +        }
 +    }
 +
 +    /* PBC/WALLS */
 +    sprintf(err_buf, "walls only work with pbc=%s", epbc_names[epbcXY]);
 +    CHECK(ir->nwall && ir->ePBC != epbcXY);
 +
 +    /* VACUUM STUFF */
 +    if (ir->ePBC != epbcXYZ && ir->nwall != 2)
 +    {
 +        if (ir->ePBC == epbcNONE)
 +        {
 +            if (ir->epc != epcNO)
 +            {
 +                warning(wi, "Turning off pressure coupling for vacuum system");
 +                ir->epc = epcNO;
 +            }
 +        }
 +        else
 +        {
 +            sprintf(err_buf, "Can not have pressure coupling with pbc=%s",
 +                    epbc_names[ir->ePBC]);
 +            CHECK(ir->epc != epcNO);
 +        }
 +        sprintf(err_buf, "Can not have Ewald with pbc=%s", epbc_names[ir->ePBC]);
 +        CHECK(EEL_FULL(ir->coulombtype));
 +
 +        sprintf(err_buf, "Can not have dispersion correction with pbc=%s",
 +                epbc_names[ir->ePBC]);
 +        CHECK(ir->eDispCorr != edispcNO);
 +    }
 +
 +    if (ir->rlist == 0.0)
 +    {
 +        sprintf(err_buf, "can only have neighborlist cut-off zero (=infinite)\n"
 +                "with coulombtype = %s or coulombtype = %s\n"
 +                "without periodic boundary conditions (pbc = %s) and\n"
 +                "rcoulomb and rvdw set to zero",
 +                eel_names[eelCUT], eel_names[eelUSER], epbc_names[epbcNONE]);
 +        CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
 +              (ir->ePBC     != epbcNONE) ||
 +              (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
 +
 +        if (ir->nstlist < 0)
 +        {
 +            warning_error(wi, "Can not have heuristic neighborlist updates without cut-off");
 +        }
 +        if (ir->nstlist > 0)
 +        {
 +            warning_note(wi, "Simulating without cut-offs is usually (slightly) faster with nstlist=0, nstype=simple and particle decomposition");
 +        }
 +    }
 +
 +    /* COMM STUFF */
 +    if (ir->nstcomm == 0)
 +    {
 +        ir->comm_mode = ecmNO;
 +    }
 +    if (ir->comm_mode != ecmNO)
 +    {
 +        if (ir->nstcomm < 0)
 +        {
 +            warning(wi, "If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
 +            ir->nstcomm = abs(ir->nstcomm);
 +        }
 +
 +        if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy)
 +        {
 +            warning_note(wi, "nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
 +            ir->nstcomm = ir->nstcalcenergy;
 +        }
 +
 +        if (ir->comm_mode == ecmANGULAR)
 +        {
 +            sprintf(err_buf, "Can not remove the rotation around the center of mass with periodic molecules");
 +            CHECK(ir->bPeriodicMols);
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                warning(wi, "Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
 +            }
 +        }
 +    }
 +
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR)
 +    {
 +        warning_note(wi, "Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
 +    }
 +
 +    sprintf(err_buf, "Twin-range neighbour searching (NS) with simple NS"
 +            " algorithm not implemented");
 +    CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist))
 +          && (ir->ns_type == ensSIMPLE));
 +
 +    /* TEMPERATURE COUPLING */
 +    if (ir->etc == etcYES)
 +    {
 +        ir->etc = etcBERENDSEN;
 +        warning_note(wi, "Old option for temperature coupling given: "
 +                     "changing \"yes\" to \"Berendsen\"\n");
 +    }
 +
 +    if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK))
 +    {
 +        if (ir->opts.nhchainlength < 1)
 +        {
 +            sprintf(warn_buf, "number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n", ir->opts.nhchainlength);
 +            ir->opts.nhchainlength = 1;
 +            warning(wi, warn_buf);
 +        }
 +
 +        if (ir->etc == etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
 +        {
 +            warning_note(wi, "leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
 +            ir->opts.nhchainlength = 1;
 +        }
 +    }
 +    else
 +    {
 +        ir->opts.nhchainlength = 0;
 +    }
 +
 +    if (ir->eI == eiVVAK)
 +    {
 +        sprintf(err_buf, "%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.",
 +                ei_names[eiVVAK]);
 +        CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1));
 +    }
 +
 +    if (ETC_ANDERSEN(ir->etc))
 +    {
 +        sprintf(err_buf, "%s temperature control not supported for integrator %s.", etcoupl_names[ir->etc], ei_names[ir->eI]);
 +        CHECK(!(EI_VV(ir->eI)));
 +
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            sprintf(err_buf, "all tau_t must currently be equal using Andersen temperature control, violated for group %d", i);
 +            CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]);
 +            sprintf(err_buf, "all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f",
 +                    i, ir->opts.tau_t[i]);
 +            CHECK(ir->opts.tau_t[i] < 0);
 +        }
 +        if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN))
 +        {
 +            sprintf(warn_buf, "Center of mass removal not necessary for %s.  All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.", etcoupl_names[ir->etc]);
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        sprintf(err_buf, "nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step", ir->nstcomm, etcoupl_names[ir->etc]);
 +        CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN));
 +
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t);
 +            sprintf(err_buf, "tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization", i, etcoupl_names[ir->etc], ir->nstcomm, ir->opts.tau_t[i], nsteps);
 +            CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE));
 +        }
 +    }
 +    if (ir->etc == etcBERENDSEN)
 +    {
 +        sprintf(warn_buf, "The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
 +                ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    if ((ir->etc == etcNOSEHOOVER || ETC_ANDERSEN(ir->etc))
 +        && ir->epc == epcBERENDSEN)
 +    {
 +        sprintf(warn_buf, "Using Berendsen pressure coupling invalidates the "
 +                "true ensemble for the thermostat");
 +        warning(wi, warn_buf);
 +    }
 +
 +    /* PRESSURE COUPLING */
 +    if (ir->epc == epcISOTROPIC)
 +    {
 +        ir->epc = epcBERENDSEN;
 +        warning_note(wi, "Old option for pressure coupling given: "
 +                     "changing \"Isotropic\" to \"Berendsen\"\n");
 +    }
 +
 +    if (ir->epc != epcNO)
 +    {
 +        dt_pcoupl = ir->nstpcouple*ir->delta_t;
 +
 +        sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p);
 +        CHECK(ir->tau_p <= 0);
 +
 +        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
 +        {
 +            sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
 +                    EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl);
 +            warning(wi, warn_buf);
 +        }
 +
 +        sprintf(err_buf, "compressibility must be > 0 when using pressure"
 +                " coupling %s\n", EPCOUPLTYPE(ir->epc));
 +        CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 ||
 +              ir->compress[ZZ][ZZ] < 0 ||
 +              (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
 +               ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
 +
 +        if (epcPARRINELLORAHMAN == ir->epc && opts->bGenVel)
 +        {
 +            sprintf(warn_buf,
 +                    "You are generating velocities so I am assuming you "
 +                    "are equilibrating a system. You are using "
 +                    "%s pressure coupling, but this can be "
 +                    "unstable for equilibration. If your system crashes, try "
 +                    "equilibrating first with Berendsen pressure coupling. If "
 +                    "you are not equilibrating the system, you can probably "
 +                    "ignore this warning.",
 +                    epcoupl_names[ir->epc]);
 +            warning(wi, warn_buf);
 +        }
 +    }
 +
 +    if (EI_VV(ir->eI))
 +    {
 +        if (ir->epc > epcNO)
 +        {
 +            if ((ir->epc != epcBERENDSEN) && (ir->epc != epcMTTK))
 +            {
 +                warning_error(wi, "for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman.");
 +            }
 +        }
 +    }
 +
 +    /* ELECTROSTATICS */
 +    /* More checks are in triple check (grompp.c) */
 +
 +    if (ir->coulombtype == eelSWITCH)
 +    {
 +        sprintf(warn_buf, "coulombtype = %s is only for testing purposes and can lead to serious "
 +                "artifacts, advice: use coulombtype = %s",
 +                eel_names[ir->coulombtype],
 +                eel_names[eelRF_ZERO]);
 +        warning(wi, warn_buf);
 +    }
 +
 +    if (ir->epsilon_r != 1 && ir->implicit_solvent == eisGBSA)
 +    {
 +        sprintf(warn_buf, "epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric", ir->epsilon_r);
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    if (EEL_RF(ir->coulombtype) && ir->epsilon_rf == 1 && ir->epsilon_r != 1)
 +    {
 +        sprintf(warn_buf, "epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf", ir->epsilon_r);
 +        warning(wi, warn_buf);
 +        ir->epsilon_rf = ir->epsilon_r;
 +        ir->epsilon_r  = 1.0;
 +    }
 +
 +    if (getenv("GALACTIC_DYNAMICS") == NULL)
 +    {
 +        sprintf(err_buf, "epsilon-r must be >= 0 instead of %g\n", ir->epsilon_r);
 +        CHECK(ir->epsilon_r < 0);
 +    }
 +
 +    if (EEL_RF(ir->coulombtype))
 +    {
 +        /* reaction field (at the cut-off) */
 +
 +        if (ir->coulombtype == eelRF_ZERO)
 +        {
 +            sprintf(warn_buf, "With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->epsilon_rf != 0);
 +            ir->epsilon_rf = 0.0;
 +        }
 +
 +        sprintf(err_buf, "epsilon-rf must be >= epsilon-r");
 +        CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
 +              (ir->epsilon_r == 0));
 +        if (ir->epsilon_rf == ir->epsilon_r)
 +        {
 +            sprintf(warn_buf, "Using epsilon-rf = epsilon-r with %s does not make sense",
 +                    eel_names[ir->coulombtype]);
 +            warning(wi, warn_buf);
 +        }
 +    }
 +    /* Allow rlist>rcoulomb for tabulated long range stuff. This just
 +     * means the interaction is zero outside rcoulomb, but it helps to
 +     * provide accurate energy conservation.
 +     */
 +    if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype))
 +    {
 +        if (EEL_SWITCHED(ir->coulombtype))
 +        {
 +            sprintf(err_buf,
 +                    "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
 +        }
 +    }
 +    else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
 +    {
 +        if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
 +        {
 +            sprintf(err_buf, "With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->rlist > ir->rcoulomb);
 +        }
 +    }
 +
 +    if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT ||
 +        ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT)
 +    {
 +        sprintf(warn_buf,
 +                "The switch/shift interaction settings are just for compatibility; you will get better "
 +                "performance from applying potential modifiers to your interactions!\n");
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    if (EEL_FULL(ir->coulombtype))
 +    {
 +        if (ir->coulombtype == eelPMESWITCH || ir->coulombtype == eelPMEUSER ||
 +            ir->coulombtype == eelPMEUSERSWITCH)
 +        {
 +            sprintf(err_buf, "With coulombtype = %s, rcoulomb must be <= rlist",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->rcoulomb > ir->rlist);
 +        }
 +        else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
 +        {
 +            if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD)
 +            {
 +                sprintf(err_buf,
 +                        "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist,\n"
 +                        "or rlistlong if nstcalclr=1. For optimal energy conservation,consider using\n"
 +                        "a potential modifier.", eel_names[ir->coulombtype]);
 +                if (ir->nstcalclr == 1)
 +                {
 +                    CHECK(ir->rcoulomb != ir->rlist && ir->rcoulomb != ir->rlistlong);
 +                }
 +                else
 +                {
 +                    CHECK(ir->rcoulomb != ir->rlist);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        if (ir->pme_order < 3)
 +        {
 +            warning_error(wi, "pme-order can not be smaller than 3");
 +        }
 +    }
 +
 +    if (ir->nwall == 2 && EEL_FULL(ir->coulombtype))
 +    {
 +        if (ir->ewald_geometry == eewg3D)
 +        {
 +            sprintf(warn_buf, "With pbc=%s you should use ewald-geometry=%s",
 +                    epbc_names[ir->ePBC], eewg_names[eewg3DC]);
 +            warning(wi, warn_buf);
 +        }
 +        /* This check avoids extra pbc coding for exclusion corrections */
 +        sprintf(err_buf, "wall-ewald-zfac should be >= 2");
 +        CHECK(ir->wall_ewald_zfac < 2);
 +    }
 +
 +    if (EVDW_SWITCHED(ir->vdwtype))
 +    {
 +        sprintf(err_buf, "With vdwtype = %s rvdw-switch must be < rvdw. Or, better - use a potential modifier.",
 +                evdw_names[ir->vdwtype]);
 +        CHECK(ir->rvdw_switch >= ir->rvdw);
 +    }
 +    else if (ir->vdwtype == evdwCUT)
 +    {
 +        if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE)
 +        {
 +            sprintf(err_buf, "With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier", evdw_names[ir->vdwtype]);
 +            CHECK(ir->rlist > ir->rvdw);
 +        }
 +    }
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype)
 +            && (ir->rlistlong <= ir->rcoulomb))
 +        {
 +            sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
 +                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +            warning_note(wi, warn_buf);
 +        }
 +        if (EVDW_SWITCHED(ir->vdwtype) && (ir->rlistlong <= ir->rvdw))
 +        {
 +            sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
 +                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +            warning_note(wi, warn_buf);
 +        }
 +    }
 +
 +    if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO)
 +    {
 +        warning_note(wi, "You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
 +    }
 +
 +    if (ir->nstlist == -1)
 +    {
 +        sprintf(err_buf, "With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
 +        CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
 +    }
 +    sprintf(err_buf, "nstlist can not be smaller than -1");
 +    CHECK(ir->nstlist < -1);
 +
 +    if (ir->eI == eiLBFGS && (ir->coulombtype == eelCUT || ir->vdwtype == evdwCUT)
 +        && ir->rvdw != 0)
 +    {
 +        warning(wi, "For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
 +    }
 +
 +    if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0)
 +    {
 +        warning(wi, "Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
 +    }
 +
 +    /* ENERGY CONSERVATION */
 +    if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE)
 +        {
 +            sprintf(warn_buf, "You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
 +                    evdw_names[evdwSHIFT]);
 +            warning_note(wi, warn_buf);
 +        }
 +        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0 && ir->coulomb_modifier == eintmodNONE)
 +        {
 +            sprintf(warn_buf, "You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
 +                    eel_names[eelPMESWITCH], eel_names[eelRF_ZERO]);
 +            warning_note(wi, warn_buf);
 +        }
 +    }
 +
 +    /* IMPLICIT SOLVENT */
 +    if (ir->coulombtype == eelGB_NOTUSED)
 +    {
 +        ir->coulombtype      = eelCUT;
 +        ir->implicit_solvent = eisGBSA;
 +        fprintf(stderr, "Note: Old option for generalized born electrostatics given:\n"
 +                "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
 +                "setting implicit-solvent value to \"GBSA\" in input section.\n");
 +    }
 +
 +    if (ir->sa_algorithm == esaSTILL)
 +    {
 +        sprintf(err_buf, "Still SA algorithm not available yet, use %s or %s instead\n", esa_names[esaAPPROX], esa_names[esaNO]);
 +        CHECK(ir->sa_algorithm == esaSTILL);
 +    }
 +
 +    if (ir->implicit_solvent == eisGBSA)
 +    {
 +        sprintf(err_buf, "With GBSA implicit solvent, rgbradii must be equal to rlist.");
 +        CHECK(ir->rgbradii != ir->rlist);
 +
 +        if (ir->coulombtype != eelCUT)
 +        {
 +            sprintf(err_buf, "With GBSA, coulombtype must be equal to %s\n", eel_names[eelCUT]);
 +            CHECK(ir->coulombtype != eelCUT);
 +        }
 +        if (ir->vdwtype != evdwCUT)
 +        {
 +            sprintf(err_buf, "With GBSA, vdw-type must be equal to %s\n", evdw_names[evdwCUT]);
 +            CHECK(ir->vdwtype != evdwCUT);
 +        }
 +        if (ir->nstgbradii < 1)
 +        {
 +            sprintf(warn_buf, "Using GBSA with nstgbradii<1, setting nstgbradii=1");
 +            warning_note(wi, warn_buf);
 +            ir->nstgbradii = 1;
 +        }
 +        if (ir->sa_algorithm == esaNO)
 +        {
 +            sprintf(warn_buf, "No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
 +            warning_note(wi, warn_buf);
 +        }
 +        if (ir->sa_surface_tension < 0 && ir->sa_algorithm != esaNO)
 +        {
 +            sprintf(warn_buf, "Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
 +            warning_note(wi, warn_buf);
 +
 +            if (ir->gb_algorithm == egbSTILL)
 +            {
 +                ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
 +            }
 +            else
 +            {
 +                ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
 +            }
 +        }
 +        if (ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO)
 +        {
 +            sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
 +            CHECK(ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO);
 +        }
 +
 +    }
 +
 +    if (ir->bAdress)
 +    {
 +        if (ir->cutoff_scheme != ecutsGROUP)
 +        {
 +            warning_error(wi, "AdresS simulation supports only cutoff-scheme=group");
 +        }
 +        if (!EI_SD(ir->eI))
 +        {
 +            warning_error(wi, "AdresS simulation supports only stochastic dynamics");
 +        }
 +        if (ir->epc != epcNO)
 +        {
 +            warning_error(wi, "AdresS simulation does not support pressure coupling");
 +        }
 +        if (EEL_FULL(ir->coulombtype))
 +        {
 +            warning_error(wi, "AdresS simulation does not support long-range electrostatics");
 +        }
 +    }
 +}
 +
 +/* count the number of text elemets separated by whitespace in a string.
 +    str = the input string
 +    maxptr = the maximum number of allowed elements
 +    ptr = the output array of pointers to the first character of each element
 +    returns: the number of elements. */
 +int str_nelem(const char *str, int maxptr, char *ptr[])
 +{
 +    int   np = 0;
 +    char *copy0, *copy;
 +
 +    copy0 = strdup(str);
 +    copy  = copy0;
 +    ltrim(copy);
 +    while (*copy != '\0')
 +    {
 +        if (np >= maxptr)
 +        {
 +            gmx_fatal(FARGS, "Too many groups on line: '%s' (max is %d)",
 +                      str, maxptr);
 +        }
 +        if (ptr)
 +        {
 +            ptr[np] = copy;
 +        }
 +        np++;
 +        while ((*copy != '\0') && !isspace(*copy))
 +        {
 +            copy++;
 +        }
 +        if (*copy != '\0')
 +        {
 +            *copy = '\0';
 +            copy++;
 +        }
 +        ltrim(copy);
 +    }
 +    if (ptr == NULL)
 +    {
 +        sfree(copy0);
 +    }
 +
 +    return np;
 +}
 +
 +/* interpret a number of doubles from a string and put them in an array,
 +   after allocating space for them.
 +   str = the input string
 +   n = the (pre-allocated) number of doubles read
 +   r = the output array of doubles. */
 +static void parse_n_real(char *str, int *n, real **r)
 +{
 +    char *ptr[MAXPTR];
 +    int   i;
 +
 +    *n = str_nelem(str, MAXPTR, ptr);
 +
 +    snew(*r, *n);
 +    for (i = 0; i < *n; i++)
 +    {
 +        (*r)[i] = strtod(ptr[i], NULL);
 +    }
 +}
 +
 +static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN], char weights[STRLEN])
 +{
 +
 +    int         i, j, max_n_lambda, nweights, nfep[efptNR];
 +    t_lambda   *fep    = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +    real      **count_fep_lambdas;
 +    gmx_bool    bOneLambda = TRUE;
 +
 +    snew(count_fep_lambdas, efptNR);
 +
 +    /* FEP input processing */
 +    /* first, identify the number of lambda values for each type.
 +       All that are nonzero must have the same number */
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        parse_n_real(fep_lambda[i], &(nfep[i]), &(count_fep_lambdas[i]));
 +    }
 +
 +    /* now, determine the number of components.  All must be either zero, or equal. */
 +
 +    max_n_lambda = 0;
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if (nfep[i] > max_n_lambda)
 +        {
 +            max_n_lambda = nfep[i];  /* here's a nonzero one.  All of them
 +                                        must have the same number if its not zero.*/
 +            break;
 +        }
 +    }
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if (nfep[i] == 0)
 +        {
 +            ir->fepvals->separate_dvdl[i] = FALSE;
 +        }
 +        else if (nfep[i] == max_n_lambda)
 +        {
 +            if (i != efptTEMPERATURE)  /* we treat this differently -- not really a reason to compute the derivative with
 +                                          respect to the temperature currently */
 +            {
 +                ir->fepvals->separate_dvdl[i] = TRUE;
 +            }
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS, "Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)",
 +                      nfep[i], efpt_names[i], max_n_lambda);
 +        }
 +    }
 +    /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */
 +    ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE;
 +
 +    /* the number of lambdas is the number we've read in, which is either zero
 +       or the same for all */
 +    fep->n_lambda = max_n_lambda;
 +
 +    /* allocate space for the array of lambda values */
 +    snew(fep->all_lambda, efptNR);
 +    /* if init_lambda is defined, we need to set lambda */
 +    if ((fep->init_lambda > 0) && (fep->n_lambda == 0))
 +    {
 +        ir->fepvals->separate_dvdl[efptFEP] = TRUE;
 +    }
 +    /* otherwise allocate the space for all of the lambdas, and transfer the data */
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        snew(fep->all_lambda[i], fep->n_lambda);
 +        if (nfep[i] > 0)  /* if it's zero, then the count_fep_lambda arrays
 +                             are zero */
 +        {
 +            for (j = 0; j < fep->n_lambda; j++)
 +            {
 +                fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j];
 +            }
 +            sfree(count_fep_lambdas[i]);
 +        }
 +    }
 +    sfree(count_fep_lambdas);
 +
 +    /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal
 +       bookkeeping -- for now, init_lambda */
 +
 +    if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0))
 +    {
 +        for (i = 0; i < fep->n_lambda; i++)
 +        {
 +            fep->all_lambda[efptFEP][i] = fep->init_lambda;
 +        }
 +    }
 +
 +    /* check to see if only a single component lambda is defined, and soft core is defined.
 +       In this case, turn on coulomb soft core */
 +
 +    if (max_n_lambda == 0)
 +    {
 +        bOneLambda = TRUE;
 +    }
 +    else
 +    {
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            if ((nfep[i] != 0) && (i != efptFEP))
 +            {
 +                bOneLambda = FALSE;
 +            }
 +        }
 +    }
 +    if ((bOneLambda) && (fep->sc_alpha > 0))
 +    {
 +        fep->bScCoul = TRUE;
 +    }
 +
 +    /* Fill in the others with the efptFEP if they are not explicitly
 +       specified (i.e. nfep[i] == 0).  This means if fep is not defined,
 +       they are all zero. */
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if ((nfep[i] == 0) && (i != efptFEP))
 +        {
 +            for (j = 0; j < fep->n_lambda; j++)
 +            {
 +                fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j];
 +            }
 +        }
 +    }
 +
 +
 +    /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
 +    if (fep->sc_r_power == 48)
 +    {
 +        if (fep->sc_alpha > 0.1)
 +        {
 +            gmx_fatal(FARGS, "sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha);
 +        }
 +    }
 +
 +    expand = ir->expandedvals;
 +    /* now read in the weights */
 +    parse_n_real(weights, &nweights, &(expand->init_lambda_weights));
 +    if (nweights == 0)
 +    {
 +        expand->bInit_weights = FALSE;
 +        snew(expand->init_lambda_weights, fep->n_lambda); /* initialize to zero */
 +    }
 +    else if (nweights != fep->n_lambda)
 +    {
 +        gmx_fatal(FARGS, "Number of weights (%d) is not equal to number of lambda values (%d)",
 +                  nweights, fep->n_lambda);
 +    }
 +    else
 +    {
 +        expand->bInit_weights = TRUE;
 +    }
 +    if ((expand->nstexpanded < 0) && (ir->efep != efepNO))
 +    {
 +        expand->nstexpanded = fep->nstdhdl;
 +        /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */
 +    }
 +    if ((expand->nstexpanded < 0) && ir->bSimTemp)
 +    {
 +        expand->nstexpanded = 2*(int)(ir->opts.tau_t[0]/ir->delta_t);
 +        /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to
 +           2*tau_t just to be careful so it's not to frequent  */
 +    }
 +}
 +
 +
 +static void do_simtemp_params(t_inputrec *ir)
 +{
 +
 +    snew(ir->simtempvals->temperatures, ir->fepvals->n_lambda);
 +    GetSimTemps(ir->fepvals->n_lambda, ir->simtempvals, ir->fepvals->all_lambda[efptTEMPERATURE]);
 +
 +    return;
 +}
 +
 +static void do_wall_params(t_inputrec *ir,
 +                           char *wall_atomtype, char *wall_density,
 +                           t_gromppopts *opts)
 +{
 +    int    nstr, i;
 +    char  *names[MAXPTR];
 +    double dbl;
 +
 +    opts->wall_atomtype[0] = NULL;
 +    opts->wall_atomtype[1] = NULL;
 +
 +    ir->wall_atomtype[0] = -1;
 +    ir->wall_atomtype[1] = -1;
 +    ir->wall_density[0]  = 0;
 +    ir->wall_density[1]  = 0;
 +
 +    if (ir->nwall > 0)
 +    {
 +        nstr = str_nelem(wall_atomtype, MAXPTR, names);
 +        if (nstr != ir->nwall)
 +        {
 +            gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %d",
 +                      ir->nwall, nstr);
 +        }
 +        for (i = 0; i < ir->nwall; i++)
 +        {
 +            opts->wall_atomtype[i] = strdup(names[i]);
 +        }
 +
 +        if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
 +        {
 +            nstr = str_nelem(wall_density, MAXPTR, names);
 +            if (nstr != ir->nwall)
 +            {
 +                gmx_fatal(FARGS, "Expected %d elements for wall-density, found %d", ir->nwall, nstr);
 +            }
 +            for (i = 0; i < ir->nwall; i++)
 +            {
 +                sscanf(names[i], "%lf", &dbl);
 +                if (dbl <= 0)
 +                {
 +                    gmx_fatal(FARGS, "wall-density[%d] = %f\n", i, dbl);
 +                }
 +                ir->wall_density[i] = dbl;
 +            }
 +        }
 +    }
 +}
 +
 +static void add_wall_energrps(gmx_groups_t *groups, int nwall, t_symtab *symtab)
 +{
 +    int     i;
 +    t_grps *grps;
 +    char    str[STRLEN];
 +
 +    if (nwall > 0)
 +    {
 +        srenew(groups->grpname, groups->ngrpname+nwall);
 +        grps = &(groups->grps[egcENER]);
 +        srenew(grps->nm_ind, grps->nr+nwall);
 +        for (i = 0; i < nwall; i++)
 +        {
 +            sprintf(str, "wall%d", i);
 +            groups->grpname[groups->ngrpname] = put_symtab(symtab, str);
 +            grps->nm_ind[grps->nr++]          = groups->ngrpname++;
 +        }
 +    }
 +}
 +
 +void read_expandedparams(int *ninp_p, t_inpfile **inp_p,
 +                         t_expanded *expand, warninp_t wi)
 +{
 +    int        ninp, nerror = 0;
 +    t_inpfile *inp;
 +
 +    ninp   = *ninp_p;
 +    inp    = *inp_p;
 +
 +    /* read expanded ensemble parameters */
 +    CCTYPE ("expanded ensemble variables");
 +    ITYPE ("nstexpanded", expand->nstexpanded, -1);
 +    EETYPE("lmc-stats", expand->elamstats, elamstats_names);
 +    EETYPE("lmc-move", expand->elmcmove, elmcmove_names);
 +    EETYPE("lmc-weights-equil", expand->elmceq, elmceq_names);
 +    ITYPE ("weight-equil-number-all-lambda", expand->equil_n_at_lam, -1);
 +    ITYPE ("weight-equil-number-samples", expand->equil_samples, -1);
 +    ITYPE ("weight-equil-number-steps", expand->equil_steps, -1);
 +    RTYPE ("weight-equil-wl-delta", expand->equil_wl_delta, -1);
 +    RTYPE ("weight-equil-count-ratio", expand->equil_ratio, -1);
 +    CCTYPE("Seed for Monte Carlo in lambda space");
 +    ITYPE ("lmc-seed", expand->lmc_seed, -1);
 +    RTYPE ("mc-temperature", expand->mc_temp, -1);
 +    ITYPE ("lmc-repeats", expand->lmc_repeats, 1);
 +    ITYPE ("lmc-gibbsdelta", expand->gibbsdeltalam, -1);
 +    ITYPE ("lmc-forced-nstart", expand->lmc_forced_nstart, 0);
 +    EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names);
 +    ITYPE("nst-transition-matrix", expand->nstTij, -1);
 +    ITYPE ("mininum-var-min", expand->minvarmin, 100); /*default is reasonable */
 +    ITYPE ("weight-c-range", expand->c_range, 0);      /* default is just C=0 */
 +    RTYPE ("wl-scale", expand->wl_scale, 0.8);
 +    RTYPE ("wl-ratio", expand->wl_ratio, 0.8);
 +    RTYPE ("init-wl-delta", expand->init_wl_delta, 1.0);
 +    EETYPE("wl-oneovert", expand->bWLoneovert, yesno_names);
 +
 +    *ninp_p   = ninp;
 +    *inp_p    = inp;
 +
 +    return;
 +}
 +
 +void get_ir(const char *mdparin, const char *mdparout,
 +            t_inputrec *ir, t_gromppopts *opts,
 +            warninp_t wi)
 +{
 +    char       *dumstr[2];
 +    double      dumdub[2][6];
 +    t_inpfile  *inp;
 +    const char *tmp;
 +    int         i, j, m, ninp;
 +    char        warn_buf[STRLEN];
 +    t_lambda   *fep    = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +
 +    inp = read_inpfile(mdparin, &ninp, NULL, wi);
 +
 +    snew(dumstr[0], STRLEN);
 +    snew(dumstr[1], STRLEN);
 +
 +    /* remove the following deprecated commands */
 +    REM_TYPE("title");
 +    REM_TYPE("cpp");
 +    REM_TYPE("domain-decomposition");
 +    REM_TYPE("andersen-seed");
 +    REM_TYPE("dihre");
 +    REM_TYPE("dihre-fc");
 +    REM_TYPE("dihre-tau");
 +    REM_TYPE("nstdihreout");
 +    REM_TYPE("nstcheckpoint");
 +
 +    /* replace the following commands with the clearer new versions*/
 +    REPL_TYPE("unconstrained-start", "continuation");
 +    REPL_TYPE("foreign-lambda", "fep-lambdas");
 +
 +    CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
 +    CTYPE ("Preprocessor information: use cpp syntax.");
 +    CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
 +    STYPE ("include", opts->include,  NULL);
 +    CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
 +    STYPE ("define",  opts->define,   NULL);
 +
 +    CCTYPE ("RUN CONTROL PARAMETERS");
 +    EETYPE("integrator",  ir->eI,         ei_names);
 +    CTYPE ("Start time and timestep in ps");
 +    RTYPE ("tinit",   ir->init_t, 0.0);
 +    RTYPE ("dt",      ir->delta_t,    0.001);
 +    STEPTYPE ("nsteps",   ir->nsteps,     0);
 +    CTYPE ("For exact run continuation or redoing part of a run");
 +    STEPTYPE ("init-step", ir->init_step,  0);
 +    CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
 +    ITYPE ("simulation-part", ir->simulation_part, 1);
 +    CTYPE ("mode for center of mass motion removal");
 +    EETYPE("comm-mode",   ir->comm_mode,  ecm_names);
 +    CTYPE ("number of steps for center of mass motion removal");
 +    ITYPE ("nstcomm", ir->nstcomm,    100);
 +    CTYPE ("group(s) for center of mass motion removal");
 +    STYPE ("comm-grps",   vcm,            NULL);
 +
 +    CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
 +    CTYPE ("Friction coefficient (amu/ps) and random seed");
 +    RTYPE ("bd-fric",     ir->bd_fric,    0.0);
 +    ITYPE ("ld-seed",     ir->ld_seed,    1993);
 +
 +    /* Em stuff */
 +    CCTYPE ("ENERGY MINIMIZATION OPTIONS");
 +    CTYPE ("Force tolerance and initial step-size");
 +    RTYPE ("emtol",       ir->em_tol,     10.0);
 +    RTYPE ("emstep",      ir->em_stepsize, 0.01);
 +    CTYPE ("Max number of iterations in relax-shells");
 +    ITYPE ("niter",       ir->niter,      20);
 +    CTYPE ("Step size (ps^2) for minimization of flexible constraints");
 +    RTYPE ("fcstep",      ir->fc_stepsize, 0);
 +    CTYPE ("Frequency of steepest descents steps when doing CG");
 +    ITYPE ("nstcgsteep",  ir->nstcgsteep, 1000);
 +    ITYPE ("nbfgscorr",   ir->nbfgscorr,  10);
 +
 +    CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
 +    RTYPE ("rtpi",    ir->rtpi,   0.05);
 +
 +    /* Output options */
 +    CCTYPE ("OUTPUT CONTROL OPTIONS");
 +    CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
 +    ITYPE ("nstxout", ir->nstxout,    0);
 +    ITYPE ("nstvout", ir->nstvout,    0);
 +    ITYPE ("nstfout", ir->nstfout,    0);
 +    ir->nstcheckpoint = 1000;
 +    CTYPE ("Output frequency for energies to log file and energy file");
 +    ITYPE ("nstlog",  ir->nstlog, 1000);
 +    ITYPE ("nstcalcenergy", ir->nstcalcenergy, 100);
 +    ITYPE ("nstenergy",   ir->nstenergy,  1000);
 +    CTYPE ("Output frequency and precision for .xtc file");
 +    ITYPE ("nstxtcout",   ir->nstxtcout,  0);
 +    RTYPE ("xtc-precision", ir->xtcprec,   1000.0);
 +    CTYPE ("This selects the subset of atoms for the .xtc file. You can");
 +    CTYPE ("select multiple groups. By default all atoms will be written.");
 +    STYPE ("xtc-grps",    xtc_grps,       NULL);
 +    CTYPE ("Selection of energy groups");
 +    STYPE ("energygrps",  energy,         NULL);
 +
 +    /* Neighbor searching */
 +    CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
 +    CTYPE ("cut-off scheme (group: using charge groups, Verlet: particle based cut-offs)");
 +    EETYPE("cutoff-scheme",     ir->cutoff_scheme,    ecutscheme_names);
 +    CTYPE ("nblist update frequency");
 +    ITYPE ("nstlist", ir->nstlist,    10);
 +    CTYPE ("ns algorithm (simple or grid)");
 +    EETYPE("ns-type",     ir->ns_type,    ens_names);
 +    /* set ndelta to the optimal value of 2 */
 +    ir->ndelta = 2;
 +    CTYPE ("Periodic boundary conditions: xyz, no, xy");
 +    EETYPE("pbc",         ir->ePBC,       epbc_names);
 +    EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
 +    CTYPE ("Allowed energy drift due to the Verlet buffer in kJ/mol/ps per atom,");
 +    CTYPE ("a value of -1 means: use rlist");
 +    RTYPE("verlet-buffer-drift", ir->verletbuf_drift,    0.005);
 +    CTYPE ("nblist cut-off");
 +    RTYPE ("rlist",   ir->rlist,  1.0);
 +    CTYPE ("long-range cut-off for switched potentials");
 +    RTYPE ("rlistlong",   ir->rlistlong,  -1);
 +    ITYPE ("nstcalclr",   ir->nstcalclr,  -1);
 +
 +    /* Electrostatics */
 +    CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
 +    CTYPE ("Method for doing electrostatics");
 +    EETYPE("coulombtype", ir->coulombtype,    eel_names);
 +    EETYPE("coulomb-modifier",    ir->coulomb_modifier,    eintmod_names);
 +    CTYPE ("cut-off lengths");
 +    RTYPE ("rcoulomb-switch", ir->rcoulomb_switch,    0.0);
 +    RTYPE ("rcoulomb",    ir->rcoulomb,   1.0);
 +    CTYPE ("Relative dielectric constant for the medium and the reaction field");
 +    RTYPE ("epsilon-r",   ir->epsilon_r,  1.0);
 +    RTYPE ("epsilon-rf",  ir->epsilon_rf, 0.0);
 +    CTYPE ("Method for doing Van der Waals");
 +    EETYPE("vdw-type",    ir->vdwtype,    evdw_names);
 +    EETYPE("vdw-modifier",    ir->vdw_modifier,    eintmod_names);
 +    CTYPE ("cut-off lengths");
 +    RTYPE ("rvdw-switch", ir->rvdw_switch,    0.0);
 +    RTYPE ("rvdw",    ir->rvdw,   1.0);
 +    CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
 +    EETYPE("DispCorr",    ir->eDispCorr,  edispc_names);
 +    CTYPE ("Extension of the potential lookup tables beyond the cut-off");
 +    RTYPE ("table-extension", ir->tabext, 1.0);
 +    CTYPE ("Separate tables between energy group pairs");
 +    STYPE ("energygrp-table", egptable,   NULL);
 +    CTYPE ("Spacing for the PME/PPPM FFT grid");
 +    RTYPE ("fourierspacing", ir->fourier_spacing, 0.12);
 +    CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
 +    ITYPE ("fourier-nx",  ir->nkx,         0);
 +    ITYPE ("fourier-ny",  ir->nky,         0);
 +    ITYPE ("fourier-nz",  ir->nkz,         0);
 +    CTYPE ("EWALD/PME/PPPM parameters");
 +    ITYPE ("pme-order",   ir->pme_order,   4);
 +    RTYPE ("ewald-rtol",  ir->ewald_rtol, 0.00001);
 +    EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
 +    RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
 +    EETYPE("optimize-fft", ir->bOptFFT,  yesno_names);
 +
 +    CCTYPE("IMPLICIT SOLVENT ALGORITHM");
 +    EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
 +
 +    CCTYPE ("GENERALIZED BORN ELECTROSTATICS");
 +    CTYPE ("Algorithm for calculating Born radii");
 +    EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
 +    CTYPE ("Frequency of calculating the Born radii inside rlist");
 +    ITYPE ("nstgbradii", ir->nstgbradii, 1);
 +    CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
 +    CTYPE ("between rlist and rgbradii is updated every nstlist steps");
 +    RTYPE ("rgbradii",  ir->rgbradii, 1.0);
 +    CTYPE ("Dielectric coefficient of the implicit solvent");
 +    RTYPE ("gb-epsilon-solvent", ir->gb_epsilon_solvent, 80.0);
 +    CTYPE ("Salt concentration in M for Generalized Born models");
 +    RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
 +    CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
 +    RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
 +    RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
 +    RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
 +    RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
 +    EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
 +    CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
 +    CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
 +    RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
 +
 +    /* Coupling stuff */
 +    CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
 +    CTYPE ("Temperature coupling");
 +    EETYPE("tcoupl",  ir->etc,        etcoupl_names);
 +    ITYPE ("nsttcouple", ir->nsttcouple,  -1);
 +    ITYPE("nh-chain-length",     ir->opts.nhchainlength, NHCHAINLENGTH);
 +    EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names);
 +    CTYPE ("Groups to couple separately");
 +    STYPE ("tc-grps",     tcgrps,         NULL);
 +    CTYPE ("Time constant (ps) and reference temperature (K)");
 +    STYPE ("tau-t",   tau_t,      NULL);
 +    STYPE ("ref-t",   ref_t,      NULL);
 +    CTYPE ("pressure coupling");
 +    EETYPE("pcoupl",  ir->epc,        epcoupl_names);
 +    EETYPE("pcoupltype",  ir->epct,       epcoupltype_names);
 +    ITYPE ("nstpcouple", ir->nstpcouple,  -1);
 +    CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
 +    RTYPE ("tau-p",   ir->tau_p,  1.0);
 +    STYPE ("compressibility", dumstr[0],  NULL);
 +    STYPE ("ref-p",       dumstr[1],      NULL);
 +    CTYPE ("Scaling of reference coordinates, No, All or COM");
 +    EETYPE ("refcoord-scaling", ir->refcoord_scaling, erefscaling_names);
 +
 +    /* QMMM */
 +    CCTYPE ("OPTIONS FOR QMMM calculations");
 +    EETYPE("QMMM", ir->bQMMM, yesno_names);
 +    CTYPE ("Groups treated Quantum Mechanically");
 +    STYPE ("QMMM-grps",  QMMM,          NULL);
 +    CTYPE ("QM method");
 +    STYPE("QMmethod",     QMmethod, NULL);
 +    CTYPE ("QMMM scheme");
 +    EETYPE("QMMMscheme",  ir->QMMMscheme,    eQMMMscheme_names);
 +    CTYPE ("QM basisset");
 +    STYPE("QMbasis",      QMbasis, NULL);
 +    CTYPE ("QM charge");
 +    STYPE ("QMcharge",    QMcharge, NULL);
 +    CTYPE ("QM multiplicity");
 +    STYPE ("QMmult",      QMmult, NULL);
 +    CTYPE ("Surface Hopping");
 +    STYPE ("SH",          bSH, NULL);
 +    CTYPE ("CAS space options");
 +    STYPE ("CASorbitals",      CASorbitals,   NULL);
 +    STYPE ("CASelectrons",     CASelectrons,  NULL);
 +    STYPE ("SAon", SAon, NULL);
 +    STYPE ("SAoff", SAoff, NULL);
 +    STYPE ("SAsteps",  SAsteps, NULL);
 +    CTYPE ("Scale factor for MM charges");
 +    RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
 +    CTYPE ("Optimization of QM subsystem");
 +    STYPE ("bOPT",          bOPT, NULL);
 +    STYPE ("bTS",          bTS, NULL);
 +
 +    /* Simulated annealing */
 +    CCTYPE("SIMULATED ANNEALING");
 +    CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
 +    STYPE ("annealing",   anneal,      NULL);
 +    CTYPE ("Number of time points to use for specifying annealing in each group");
 +    STYPE ("annealing-npoints", anneal_npoints, NULL);
 +    CTYPE ("List of times at the annealing points for each group");
 +    STYPE ("annealing-time",       anneal_time,       NULL);
 +    CTYPE ("Temp. at each annealing point, for each group.");
 +    STYPE ("annealing-temp",  anneal_temp,  NULL);
 +
 +    /* Startup run */
 +    CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
 +    EETYPE("gen-vel",     opts->bGenVel,  yesno_names);
 +    RTYPE ("gen-temp",    opts->tempi,    300.0);
 +    ITYPE ("gen-seed",    opts->seed,     173529);
 +
 +    /* Shake stuff */
 +    CCTYPE ("OPTIONS FOR BONDS");
 +    EETYPE("constraints", opts->nshake,   constraints);
 +    CTYPE ("Type of constraint algorithm");
 +    EETYPE("constraint-algorithm",  ir->eConstrAlg, econstr_names);
 +    CTYPE ("Do not constrain the start configuration");
 +    EETYPE("continuation", ir->bContinuation, yesno_names);
 +    CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
 +    EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
 +    CTYPE ("Relative tolerance of shake");
 +    RTYPE ("shake-tol", ir->shake_tol, 0.0001);
 +    CTYPE ("Highest order in the expansion of the constraint coupling matrix");
 +    ITYPE ("lincs-order", ir->nProjOrder, 4);
 +    CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
 +    CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
 +    CTYPE ("For energy minimization with constraints it should be 4 to 8.");
 +    ITYPE ("lincs-iter", ir->nLincsIter, 1);
 +    CTYPE ("Lincs will write a warning to the stderr if in one step a bond");
 +    CTYPE ("rotates over more degrees than");
 +    RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
 +    CTYPE ("Convert harmonic bonds to morse potentials");
 +    EETYPE("morse",       opts->bMorse, yesno_names);
 +
 +    /* Energy group exclusions */
 +    CCTYPE ("ENERGY GROUP EXCLUSIONS");
 +    CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
 +    STYPE ("energygrp-excl", egpexcl,     NULL);
 +
 +    /* Walls */
 +    CCTYPE ("WALLS");
 +    CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
 +    ITYPE ("nwall", ir->nwall, 0);
 +    EETYPE("wall-type",     ir->wall_type,   ewt_names);
 +    RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
 +    STYPE ("wall-atomtype", wall_atomtype, NULL);
 +    STYPE ("wall-density",  wall_density,  NULL);
 +    RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
 +
 +    /* COM pulling */
 +    CCTYPE("COM PULLING");
 +    CTYPE("Pull type: no, umbrella, constraint or constant-force");
 +    EETYPE("pull",          ir->ePull, epull_names);
 +    if (ir->ePull != epullNO)
 +    {
 +        snew(ir->pull, 1);
 +        pull_grp = read_pullparams(&ninp, &inp, ir->pull, &opts->pull_start, wi);
 +    }
 +
 +    /* Enforced rotation */
 +    CCTYPE("ENFORCED ROTATION");
 +    CTYPE("Enforced rotation: No or Yes");
 +    EETYPE("rotation",       ir->bRot, yesno_names);
 +    if (ir->bRot)
 +    {
 +        snew(ir->rot, 1);
 +        rot_grp = read_rotparams(&ninp, &inp, ir->rot, wi);
 +    }
 +
 +    /* Refinement */
 +    CCTYPE("NMR refinement stuff");
 +    CTYPE ("Distance restraints type: No, Simple or Ensemble");
 +    EETYPE("disre",       ir->eDisre,     edisre_names);
 +    CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
 +    EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
 +    CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
 +    EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
 +    RTYPE ("disre-fc",    ir->dr_fc,  1000.0);
 +    RTYPE ("disre-tau",   ir->dr_tau, 0.0);
 +    CTYPE ("Output frequency for pair distances to energy file");
 +    ITYPE ("nstdisreout", ir->nstdisreout, 100);
 +    CTYPE ("Orientation restraints: No or Yes");
 +    EETYPE("orire",       opts->bOrire,   yesno_names);
 +    CTYPE ("Orientation restraints force constant and tau for time averaging");
 +    RTYPE ("orire-fc",    ir->orires_fc,  0.0);
 +    RTYPE ("orire-tau",   ir->orires_tau, 0.0);
 +    STYPE ("orire-fitgrp", orirefitgrp,    NULL);
 +    CTYPE ("Output frequency for trace(SD) and S to energy file");
 +    ITYPE ("nstorireout", ir->nstorireout, 100);
 +
 +    /* free energy variables */
 +    CCTYPE ("Free energy variables");
 +    EETYPE("free-energy", ir->efep, efep_names);
 +    STYPE ("couple-moltype",  couple_moltype,  NULL);
 +    EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
 +    EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
 +    EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
 +
 +    RTYPE ("init-lambda", fep->init_lambda, -1); /* start with -1 so
 +                                                    we can recognize if
 +                                                    it was not entered */
 +    ITYPE ("init-lambda-state", fep->init_fep_state, -1);
 +    RTYPE ("delta-lambda", fep->delta_lambda, 0.0);
 +    ITYPE ("nstdhdl", fep->nstdhdl, 50);
 +    STYPE ("fep-lambdas", fep_lambda[efptFEP], NULL);
 +    STYPE ("mass-lambdas", fep_lambda[efptMASS], NULL);
 +    STYPE ("coul-lambdas", fep_lambda[efptCOUL], NULL);
 +    STYPE ("vdw-lambdas", fep_lambda[efptVDW], NULL);
 +    STYPE ("bonded-lambdas", fep_lambda[efptBONDED], NULL);
 +    STYPE ("restraint-lambdas", fep_lambda[efptRESTRAINT], NULL);
 +    STYPE ("temperature-lambdas", fep_lambda[efptTEMPERATURE], NULL);
 +    ITYPE ("calc-lambda-neighbors", fep->lambda_neighbors, 1);
 +    STYPE ("init-lambda-weights", lambda_weights, NULL);
 +    EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names);
 +    RTYPE ("sc-alpha", fep->sc_alpha, 0.0);
 +    ITYPE ("sc-power", fep->sc_power, 1);
 +    RTYPE ("sc-r-power", fep->sc_r_power, 6.0);
 +    RTYPE ("sc-sigma", fep->sc_sigma, 0.3);
 +    EETYPE("sc-coul", fep->bScCoul, yesno_names);
 +    ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +    RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +    EETYPE("separate-dhdl-file", fep->separate_dhdl_file,
 +           separate_dhdl_file_names);
 +    EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names);
 +    ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +    RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +
 +    /* Non-equilibrium MD stuff */
 +    CCTYPE("Non-equilibrium MD stuff");
 +    STYPE ("acc-grps",    accgrps,        NULL);
 +    STYPE ("accelerate",  acc,            NULL);
 +    STYPE ("freezegrps",  freeze,         NULL);
 +    STYPE ("freezedim",   frdim,          NULL);
 +    RTYPE ("cos-acceleration", ir->cos_accel, 0);
 +    STYPE ("deform",      deform,         NULL);
 +
 +    /* simulated tempering variables */
 +    CCTYPE("simulated tempering variables");
 +    EETYPE("simulated-tempering", ir->bSimTemp, yesno_names);
 +    EETYPE("simulated-tempering-scaling", ir->simtempvals->eSimTempScale, esimtemp_names);
 +    RTYPE("sim-temp-low", ir->simtempvals->simtemp_low, 300.0);
 +    RTYPE("sim-temp-high", ir->simtempvals->simtemp_high, 300.0);
 +
 +    /* expanded ensemble variables */
 +    if (ir->efep == efepEXPANDED || ir->bSimTemp)
 +    {
 +        read_expandedparams(&ninp, &inp, expand, wi);
 +    }
 +
 +    /* Electric fields */
 +    CCTYPE("Electric fields");
 +    CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
 +    CTYPE ("and a phase angle (real)");
 +    STYPE ("E-x",     efield_x,   NULL);
 +    STYPE ("E-xt",    efield_xt,  NULL);
 +    STYPE ("E-y",     efield_y,   NULL);
 +    STYPE ("E-yt",    efield_yt,  NULL);
 +    STYPE ("E-z",     efield_z,   NULL);
 +    STYPE ("E-zt",    efield_zt,  NULL);
 +
 +    /* AdResS defined thingies */
 +    CCTYPE ("AdResS parameters");
 +    EETYPE("adress",       ir->bAdress, yesno_names);
 +    if (ir->bAdress)
 +    {
 +        snew(ir->adress, 1);
 +        read_adressparams(&ninp, &inp, ir->adress, wi);
 +    }
 +
 +    /* User defined thingies */
 +    CCTYPE ("User defined thingies");
 +    STYPE ("user1-grps",  user1,          NULL);
 +    STYPE ("user2-grps",  user2,          NULL);
 +    ITYPE ("userint1",    ir->userint1,   0);
 +    ITYPE ("userint2",    ir->userint2,   0);
 +    ITYPE ("userint3",    ir->userint3,   0);
 +    ITYPE ("userint4",    ir->userint4,   0);
 +    RTYPE ("userreal1",   ir->userreal1,  0);
 +    RTYPE ("userreal2",   ir->userreal2,  0);
 +    RTYPE ("userreal3",   ir->userreal3,  0);
 +    RTYPE ("userreal4",   ir->userreal4,  0);
 +#undef CTYPE
 +
 +    write_inpfile(mdparout, ninp, inp, FALSE, wi);
 +    for (i = 0; (i < ninp); i++)
 +    {
 +        sfree(inp[i].name);
 +        sfree(inp[i].value);
 +    }
 +    sfree(inp);
 +
 +    /* Process options if necessary */
 +    for (m = 0; m < 2; m++)
 +    {
 +        for (i = 0; i < 2*DIM; i++)
 +        {
 +            dumdub[m][i] = 0.0;
 +        }
 +        if (ir->epc)
 +        {
 +            switch (ir->epct)
 +            {
 +                case epctISOTROPIC:
 +                    if (sscanf(dumstr[m], "%lf", &(dumdub[m][XX])) != 1)
 +                    {
 +                        warning_error(wi, "Pressure coupling not enough values (I need 1)");
 +                    }
 +                    dumdub[m][YY] = dumdub[m][ZZ] = dumdub[m][XX];
 +                    break;
 +                case epctSEMIISOTROPIC:
 +                case epctSURFACETENSION:
 +                    if (sscanf(dumstr[m], "%lf%lf",
 +                               &(dumdub[m][XX]), &(dumdub[m][ZZ])) != 2)
 +                    {
 +                        warning_error(wi, "Pressure coupling not enough values (I need 2)");
 +                    }
 +                    dumdub[m][YY] = dumdub[m][XX];
 +                    break;
 +                case epctANISOTROPIC:
 +                    if (sscanf(dumstr[m], "%lf%lf%lf%lf%lf%lf",
 +                               &(dumdub[m][XX]), &(dumdub[m][YY]), &(dumdub[m][ZZ]),
 +                               &(dumdub[m][3]), &(dumdub[m][4]), &(dumdub[m][5])) != 6)
 +                    {
 +                        warning_error(wi, "Pressure coupling not enough values (I need 6)");
 +                    }
 +                    break;
 +                default:
 +                    gmx_fatal(FARGS, "Pressure coupling type %s not implemented yet",
 +                              epcoupltype_names[ir->epct]);
 +            }
 +        }
 +    }
 +    clear_mat(ir->ref_p);
 +    clear_mat(ir->compress);
 +    for (i = 0; i < DIM; i++)
 +    {
 +        ir->ref_p[i][i]    = dumdub[1][i];
 +        ir->compress[i][i] = dumdub[0][i];
 +    }
 +    if (ir->epct == epctANISOTROPIC)
 +    {
 +        ir->ref_p[XX][YY] = dumdub[1][3];
 +        ir->ref_p[XX][ZZ] = dumdub[1][4];
 +        ir->ref_p[YY][ZZ] = dumdub[1][5];
 +        if (ir->ref_p[XX][YY] != 0 && ir->ref_p[XX][ZZ] != 0 && ir->ref_p[YY][ZZ] != 0)
 +        {
 +            warning(wi, "All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
 +        }
 +        ir->compress[XX][YY] = dumdub[0][3];
 +        ir->compress[XX][ZZ] = dumdub[0][4];
 +        ir->compress[YY][ZZ] = dumdub[0][5];
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (m = 0; m < i; m++)
 +            {
 +                ir->ref_p[i][m]    = ir->ref_p[m][i];
 +                ir->compress[i][m] = ir->compress[m][i];
 +            }
 +        }
 +    }
 +
 +    if (ir->comm_mode == ecmNO)
 +    {
 +        ir->nstcomm = 0;
 +    }
 +
 +    opts->couple_moltype = NULL;
 +    if (strlen(couple_moltype) > 0)
 +    {
 +        if (ir->efep != efepNO)
 +        {
 +            opts->couple_moltype = strdup(couple_moltype);
 +            if (opts->couple_lam0 == opts->couple_lam1)
 +            {
 +                warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
 +            }
 +            if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
 +                                   opts->couple_lam1 == ecouplamNONE))
 +            {
 +                warning(wi, "For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
 +            }
 +        }
 +        else
 +        {
 +            warning(wi, "Can not couple a molecule with free_energy = no");
 +        }
 +    }
 +    /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
 +    if (ir->efep != efepNO)
 +    {
 +        if (fep->delta_lambda > 0)
 +        {
 +            ir->efep = efepSLOWGROWTH;
 +        }
 +    }
 +
 +    if (ir->bSimTemp)
 +    {
 +        fep->bPrintEnergy = TRUE;
 +        /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy
 +           if the temperature is changing. */
 +    }
 +
 +    if ((ir->efep != efepNO) || ir->bSimTemp)
 +    {
 +        ir->bExpanded = FALSE;
 +        if ((ir->efep == efepEXPANDED) || ir->bSimTemp)
 +        {
 +            ir->bExpanded = TRUE;
 +        }
 +        do_fep_params(ir, fep_lambda, lambda_weights);
 +        if (ir->bSimTemp) /* done after fep params */
 +        {
 +            do_simtemp_params(ir);
 +        }
 +    }
 +    else
 +    {
 +        ir->fepvals->n_lambda = 0;
 +    }
 +
 +    /* WALL PARAMETERS */
 +
 +    do_wall_params(ir, wall_atomtype, wall_density, opts);
 +
 +    /* ORIENTATION RESTRAINT PARAMETERS */
 +
 +    if (opts->bOrire && str_nelem(orirefitgrp, MAXPTR, NULL) != 1)
 +    {
 +        warning_error(wi, "ERROR: Need one orientation restraint fit group\n");
 +    }
 +
 +    /* DEFORMATION PARAMETERS */
 +
 +    clear_mat(ir->deform);
 +    for (i = 0; i < 6; i++)
 +    {
 +        dumdub[0][i] = 0;
 +    }
 +    m = sscanf(deform, "%lf %lf %lf %lf %lf %lf",
 +               &(dumdub[0][0]), &(dumdub[0][1]), &(dumdub[0][2]),
 +               &(dumdub[0][3]), &(dumdub[0][4]), &(dumdub[0][5]));
 +    for (i = 0; i < 3; i++)
 +    {
 +        ir->deform[i][i] = dumdub[0][i];
 +    }
 +    ir->deform[YY][XX] = dumdub[0][3];
 +    ir->deform[ZZ][XX] = dumdub[0][4];
 +    ir->deform[ZZ][YY] = dumdub[0][5];
 +    if (ir->epc != epcNO)
 +    {
 +        for (i = 0; i < 3; i++)
 +        {
 +            for (j = 0; j <= i; j++)
 +            {
 +                if (ir->deform[i][j] != 0 && ir->compress[i][j] != 0)
 +                {
 +                    warning_error(wi, "A box element has deform set and compressibility > 0");
 +                }
 +            }
 +        }
 +        for (i = 0; i < 3; i++)
 +        {
 +            for (j = 0; j < i; j++)
 +            {
 +                if (ir->deform[i][j] != 0)
 +                {
 +                    for (m = j; m < DIM; m++)
 +                    {
 +                        if (ir->compress[m][j] != 0)
 +                        {
 +                            sprintf(warn_buf, "An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
 +                            warning(wi, warn_buf);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    sfree(dumstr[0]);
 +    sfree(dumstr[1]);
 +}
 +
 +static int search_QMstring(char *s, int ng, const char *gn[])
 +{
 +    /* same as normal search_string, but this one searches QM strings */
 +    int i;
 +
 +    for (i = 0; (i < ng); i++)
 +    {
 +        if (gmx_strcasecmp(s, gn[i]) == 0)
 +        {
 +            return i;
 +        }
 +    }
 +
 +    gmx_fatal(FARGS, "this QM method or basisset (%s) is not implemented\n!", s);
 +
 +    return -1;
 +
 +} /* search_QMstring */
 +
 +
 +int search_string(char *s, int ng, char *gn[])
 +{
 +    int i;
 +
 +    for (i = 0; (i < ng); i++)
 +    {
 +        if (gmx_strcasecmp(s, gn[i]) == 0)
 +        {
 +            return i;
 +        }
 +    }
 +
 +    gmx_fatal(FARGS,
 +              "Group %s referenced in the .mdp file was not found in the index file.\n"
 +              "Group names must match either [moleculetype] names or custom index group\n"
 +              "names, in which case you must supply an index file to the '-n' option\n"
 +              "of grompp.",
 +              s);
 +
 +    return -1;
 +}
 +
 +static gmx_bool do_numbering(int natoms, gmx_groups_t *groups, int ng, char *ptrs[],
 +                             t_blocka *block, char *gnames[],
 +                             int gtype, int restnm,
 +                             int grptp, gmx_bool bVerbose,
 +                             warninp_t wi)
 +{
 +    unsigned short *cbuf;
 +    t_grps         *grps = &(groups->grps[gtype]);
 +    int             i, j, gid, aj, ognr, ntot = 0;
 +    const char     *title;
 +    gmx_bool        bRest;
 +    char            warn_buf[STRLEN];
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Starting numbering %d groups of type %d\n", ng, gtype);
 +    }
 +
 +    title = gtypes[gtype];
 +
 +    snew(cbuf, natoms);
 +    /* Mark all id's as not set */
 +    for (i = 0; (i < natoms); i++)
 +    {
 +        cbuf[i] = NOGID;
 +    }
 +
 +    snew(grps->nm_ind, ng+1); /* +1 for possible rest group */
 +    for (i = 0; (i < ng); i++)
 +    {
 +        /* Lookup the group name in the block structure */
 +        gid = search_string(ptrs[i], block->nr, gnames);
 +        if ((grptp != egrptpONE) || (i == 0))
 +        {
 +            grps->nm_ind[grps->nr++] = gid;
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug, "Found gid %d for group %s\n", gid, ptrs[i]);
 +        }
 +
 +        /* Now go over the atoms in the group */
 +        for (j = block->index[gid]; (j < block->index[gid+1]); j++)
 +        {
 +
 +            aj = block->a[j];
 +
 +            /* Range checking */
 +            if ((aj < 0) || (aj >= natoms))
 +            {
 +                gmx_fatal(FARGS, "Invalid atom number %d in indexfile", aj);
 +            }
 +            /* Lookup up the old group number */
 +            ognr = cbuf[aj];
 +            if (ognr != NOGID)
 +            {
 +                gmx_fatal(FARGS, "Atom %d in multiple %s groups (%d and %d)",
 +                          aj+1, title, ognr+1, i+1);
 +            }
 +            else
 +            {
 +                /* Store the group number in buffer */
 +                if (grptp == egrptpONE)
 +                {
 +                    cbuf[aj] = 0;
 +                }
 +                else
 +                {
 +                    cbuf[aj] = i;
 +                }
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    /* Now check whether we have done all atoms */
 +    bRest = FALSE;
 +    if (ntot != natoms)
 +    {
 +        if (grptp == egrptpALL)
 +        {
 +            gmx_fatal(FARGS, "%d atoms are not part of any of the %s groups",
 +                      natoms-ntot, title);
 +        }
 +        else if (grptp == egrptpPART)
 +        {
 +            sprintf(warn_buf, "%d atoms are not part of any of the %s groups",
 +                    natoms-ntot, title);
 +            warning_note(wi, warn_buf);
 +        }
 +        /* Assign all atoms currently unassigned to a rest group */
 +        for (j = 0; (j < natoms); j++)
 +        {
 +            if (cbuf[j] == NOGID)
 +            {
 +                cbuf[j] = grps->nr;
 +                bRest   = TRUE;
 +            }
 +        }
 +        if (grptp != egrptpPART)
 +        {
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,
 +                        "Making dummy/rest group for %s containing %d elements\n",
 +                        title, natoms-ntot);
 +            }
 +            /* Add group name "rest" */
 +            grps->nm_ind[grps->nr] = restnm;
 +
 +            /* Assign the rest name to all atoms not currently assigned to a group */
 +            for (j = 0; (j < natoms); j++)
 +            {
 +                if (cbuf[j] == NOGID)
 +                {
 +                    cbuf[j] = grps->nr;
 +                }
 +            }
 +            grps->nr++;
 +        }
 +    }
 +
 +    if (grps->nr == 1 && (ntot == 0 || ntot == natoms))
 +    {
 +        /* All atoms are part of one (or no) group, no index required */
 +        groups->ngrpnr[gtype] = 0;
 +        groups->grpnr[gtype]  = NULL;
 +    }
 +    else
 +    {
 +        groups->ngrpnr[gtype] = natoms;
 +        snew(groups->grpnr[gtype], natoms);
 +        for (j = 0; (j < natoms); j++)
 +        {
 +            groups->grpnr[gtype][j] = cbuf[j];
 +        }
 +    }
 +
 +    sfree(cbuf);
 +
 +    return (bRest && grptp == egrptpPART);
 +}
 +
 +static void calc_nrdf(gmx_mtop_t *mtop, t_inputrec *ir, char **gnames)
 +{
 +    t_grpopts              *opts;
 +    gmx_groups_t           *groups;
 +    t_pull                 *pull;
 +    int                     natoms, ai, aj, i, j, d, g, imin, jmin, nc;
 +    t_iatom                *ia;
 +    int                    *nrdf2, *na_vcm, na_tot;
 +    double                 *nrdf_tc, *nrdf_vcm, nrdf_uc, n_sub = 0;
 +    gmx_mtop_atomloop_all_t aloop;
 +    t_atom                 *atom;
 +    int                     mb, mol, ftype, as;
 +    gmx_molblock_t         *molb;
 +    gmx_moltype_t          *molt;
 +
 +    /* Calculate nrdf.
 +     * First calc 3xnr-atoms for each group
 +     * then subtract half a degree of freedom for each constraint
 +     *
 +     * Only atoms and nuclei contribute to the degrees of freedom...
 +     */
 +
 +    opts = &ir->opts;
 +
 +    groups = &mtop->groups;
 +    natoms = mtop->natoms;
 +
 +    /* Allocate one more for a possible rest group */
 +    /* We need to sum degrees of freedom into doubles,
 +     * since floats give too low nrdf's above 3 million atoms.
 +     */
 +    snew(nrdf_tc, groups->grps[egcTC].nr+1);
 +    snew(nrdf_vcm, groups->grps[egcVCM].nr+1);
 +    snew(na_vcm, groups->grps[egcVCM].nr+1);
 +
 +    for (i = 0; i < groups->grps[egcTC].nr; i++)
 +    {
 +        nrdf_tc[i] = 0;
 +    }
 +    for (i = 0; i < groups->grps[egcVCM].nr+1; i++)
 +    {
 +        nrdf_vcm[i] = 0;
 +    }
 +
 +    snew(nrdf2, natoms);
 +    aloop = gmx_mtop_atomloop_all_init(mtop);
 +    while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
 +    {
 +        nrdf2[i] = 0;
 +        if (atom->ptype == eptAtom || atom->ptype == eptNucleus)
 +        {
 +            g = ggrpnr(groups, egcFREEZE, i);
 +            /* Double count nrdf for particle i */
 +            for (d = 0; d < DIM; d++)
 +            {
 +                if (opts->nFreeze[g][d] == 0)
 +                {
 +                    nrdf2[i] += 2;
 +                }
 +            }
 +            nrdf_tc [ggrpnr(groups, egcTC, i)]  += 0.5*nrdf2[i];
 +            nrdf_vcm[ggrpnr(groups, egcVCM, i)] += 0.5*nrdf2[i];
 +        }
 +    }
 +
 +    as = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        atom = molt->atoms.atom;
 +        for (mol = 0; mol < molb->nmol; mol++)
 +        {
 +            for (ftype = F_CONSTR; ftype <= F_CONSTRNC; ftype++)
 +            {
 +                ia = molt->ilist[ftype].iatoms;
 +                for (i = 0; i < molt->ilist[ftype].nr; )
 +                {
 +                    /* Subtract degrees of freedom for the constraints,
 +                     * if the particles still have degrees of freedom left.
 +                     * If one of the particles is a vsite or a shell, then all
 +                     * constraint motion will go there, but since they do not
 +                     * contribute to the constraints the degrees of freedom do not
 +                     * change.
 +                     */
 +                    ai = as + ia[1];
 +                    aj = as + ia[2];
 +                    if (((atom[ia[1]].ptype == eptNucleus) ||
 +                         (atom[ia[1]].ptype == eptAtom)) &&
 +                        ((atom[ia[2]].ptype == eptNucleus) ||
 +                         (atom[ia[2]].ptype == eptAtom)))
 +                    {
 +                        if (nrdf2[ai] > 0)
 +                        {
 +                            jmin = 1;
 +                        }
 +                        else
 +                        {
 +                            jmin = 2;
 +                        }
 +                        if (nrdf2[aj] > 0)
 +                        {
 +                            imin = 1;
 +                        }
 +                        else
 +                        {
 +                            imin = 2;
 +                        }
 +                        imin       = min(imin, nrdf2[ai]);
 +                        jmin       = min(jmin, nrdf2[aj]);
 +                        nrdf2[ai] -= imin;
 +                        nrdf2[aj] -= jmin;
 +                        nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5*imin;
 +                        nrdf_tc [ggrpnr(groups, egcTC, aj)]  -= 0.5*jmin;
 +                        nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
 +                        nrdf_vcm[ggrpnr(groups, egcVCM, aj)] -= 0.5*jmin;
 +                    }
 +                    ia += interaction_function[ftype].nratoms+1;
 +                    i  += interaction_function[ftype].nratoms+1;
 +                }
 +            }
 +            ia = molt->ilist[F_SETTLE].iatoms;
 +            for (i = 0; i < molt->ilist[F_SETTLE].nr; )
 +            {
 +                /* Subtract 1 dof from every atom in the SETTLE */
 +                for (j = 0; j < 3; j++)
 +                {
 +                    ai         = as + ia[1+j];
 +                    imin       = min(2, nrdf2[ai]);
 +                    nrdf2[ai] -= imin;
 +                    nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5*imin;
 +                    nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
 +                }
 +                ia += 4;
 +                i  += 4;
 +            }
 +            as += molt->atoms.nr;
 +        }
 +    }
 +
 +    if (ir->ePull == epullCONSTRAINT)
 +    {
 +        /* Correct nrdf for the COM constraints.
 +         * We correct using the TC and VCM group of the first atom
 +         * in the reference and pull group. If atoms in one pull group
 +         * belong to different TC or VCM groups it is anyhow difficult
 +         * to determine the optimal nrdf assignment.
 +         */
 +        pull = ir->pull;
 +        if (pull->eGeom == epullgPOS)
 +        {
 +            nc = 0;
 +            for (i = 0; i < DIM; i++)
 +            {
 +                if (pull->dim[i])
 +                {
 +                    nc++;
 +                }
 +            }
 +        }
 +        else
 +        {
 +            nc = 1;
 +        }
 +        for (i = 0; i < pull->ngrp; i++)
 +        {
 +            imin = 2*nc;
 +            if (pull->grp[0].nat > 0)
 +            {
 +                /* Subtract 1/2 dof from the reference group */
 +                ai = pull->grp[0].ind[0];
 +                if (nrdf_tc[ggrpnr(groups, egcTC, ai)] > 1)
 +                {
 +                    nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5;
 +                    nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5;
 +                    imin--;
 +                }
 +            }
 +            /* Subtract 1/2 dof from the pulled group */
 +            ai = pull->grp[1+i].ind[0];
 +            nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5*imin;
 +            nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
 +            if (nrdf_tc[ggrpnr(groups, egcTC, ai)] < 0)
 +            {
 +                gmx_fatal(FARGS, "Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative", gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups, egcTC, ai)]]);
 +            }
 +        }
 +    }
 +
 +    if (ir->nstcomm != 0)
 +    {
 +        /* Subtract 3 from the number of degrees of freedom in each vcm group
 +         * when com translation is removed and 6 when rotation is removed
 +         * as well.
 +         */
 +        switch (ir->comm_mode)
 +        {
 +            case ecmLINEAR:
 +                n_sub = ndof_com(ir);
 +                break;
 +            case ecmANGULAR:
 +                n_sub = 6;
 +                break;
 +            default:
 +                n_sub = 0;
 +                gmx_incons("Checking comm_mode");
 +        }
 +
 +        for (i = 0; i < groups->grps[egcTC].nr; i++)
 +        {
 +            /* Count the number of atoms of TC group i for every VCM group */
 +            for (j = 0; j < groups->grps[egcVCM].nr+1; j++)
 +            {
 +                na_vcm[j] = 0;
 +            }
 +            na_tot = 0;
 +            for (ai = 0; ai < natoms; ai++)
 +            {
 +                if (ggrpnr(groups, egcTC, ai) == i)
 +                {
 +                    na_vcm[ggrpnr(groups, egcVCM, ai)]++;
 +                    na_tot++;
 +                }
 +            }
 +            /* Correct for VCM removal according to the fraction of each VCM
 +             * group present in this TC group.
 +             */
 +            nrdf_uc = nrdf_tc[i];
 +            if (debug)
 +            {
 +                fprintf(debug, "T-group[%d] nrdf_uc = %g, n_sub = %g\n",
 +                        i, nrdf_uc, n_sub);
 +            }
 +            nrdf_tc[i] = 0;
 +            for (j = 0; j < groups->grps[egcVCM].nr+1; j++)
 +            {
 +                if (nrdf_vcm[j] > n_sub)
 +                {
 +                    nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
 +                        (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
 +                }
 +                if (debug)
 +                {
 +                    fprintf(debug, "  nrdf_vcm[%d] = %g, nrdf = %g\n",
 +                            j, nrdf_vcm[j], nrdf_tc[i]);
 +                }
 +            }
 +        }
 +    }
 +    for (i = 0; (i < groups->grps[egcTC].nr); i++)
 +    {
 +        opts->nrdf[i] = nrdf_tc[i];
 +        if (opts->nrdf[i] < 0)
 +        {
 +            opts->nrdf[i] = 0;
 +        }
 +        fprintf(stderr,
 +                "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
 +                gnames[groups->grps[egcTC].nm_ind[i]], opts->nrdf[i]);
 +    }
 +
 +    sfree(nrdf2);
 +    sfree(nrdf_tc);
 +    sfree(nrdf_vcm);
 +    sfree(na_vcm);
 +}
 +
 +static void decode_cos(char *s, t_cosines *cosine, gmx_bool bTime)
 +{
 +    char   *t;
 +    char    format[STRLEN], f1[STRLEN];
 +    double  a, phi;
 +    int     i;
 +
 +    t = strdup(s);
 +    trim(t);
 +
 +    cosine->n   = 0;
 +    cosine->a   = NULL;
 +    cosine->phi = NULL;
 +    if (strlen(t))
 +    {
 +        sscanf(t, "%d", &(cosine->n));
 +        if (cosine->n <= 0)
 +        {
 +            cosine->n = 0;
 +        }
 +        else
 +        {
 +            snew(cosine->a, cosine->n);
 +            snew(cosine->phi, cosine->n);
 +
 +            sprintf(format, "%%*d");
 +            for (i = 0; (i < cosine->n); i++)
 +            {
 +                strcpy(f1, format);
 +                strcat(f1, "%lf%lf");
 +                if (sscanf(t, f1, &a, &phi) < 2)
 +                {
 +                    gmx_fatal(FARGS, "Invalid input for electric field shift: '%s'", t);
 +                }
 +                cosine->a[i]   = a;
 +                cosine->phi[i] = phi;
 +                strcat(format, "%*lf%*lf");
 +            }
 +        }
 +    }
 +    sfree(t);
 +}
 +
 +static gmx_bool do_egp_flag(t_inputrec *ir, gmx_groups_t *groups,
 +                            const char *option, const char *val, int flag)
 +{
 +    /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
 +     * But since this is much larger than STRLEN, such a line can not be parsed.
 +     * The real maximum is the number of names that fit in a string: STRLEN/2.
 +     */
 +#define EGP_MAX (STRLEN/2)
 +    int      nelem, i, j, k, nr;
 +    char    *names[EGP_MAX];
 +    char  ***gnames;
 +    gmx_bool bSet;
 +
 +    gnames = groups->grpname;
 +
 +    nelem = str_nelem(val, EGP_MAX, names);
 +    if (nelem % 2 != 0)
 +    {
 +        gmx_fatal(FARGS, "The number of groups for %s is odd", option);
 +    }
 +    nr   = groups->grps[egcENER].nr;
 +    bSet = FALSE;
 +    for (i = 0; i < nelem/2; i++)
 +    {
 +        j = 0;
 +        while ((j < nr) &&
 +               gmx_strcasecmp(names[2*i], *(gnames[groups->grps[egcENER].nm_ind[j]])))
 +        {
 +            j++;
 +        }
 +        if (j == nr)
 +        {
 +            gmx_fatal(FARGS, "%s in %s is not an energy group\n",
 +                      names[2*i], option);
 +        }
 +        k = 0;
 +        while ((k < nr) &&
 +               gmx_strcasecmp(names[2*i+1], *(gnames[groups->grps[egcENER].nm_ind[k]])))
 +        {
 +            k++;
 +        }
 +        if (k == nr)
 +        {
 +            gmx_fatal(FARGS, "%s in %s is not an energy group\n",
 +                      names[2*i+1], option);
 +        }
 +        if ((j < nr) && (k < nr))
 +        {
 +            ir->opts.egp_flags[nr*j+k] |= flag;
 +            ir->opts.egp_flags[nr*k+j] |= flag;
 +            bSet = TRUE;
 +        }
 +    }
 +
 +    return bSet;
 +}
 +
 +void do_index(const char* mdparin, const char *ndx,
 +              gmx_mtop_t *mtop,
 +              gmx_bool bVerbose,
 +              t_inputrec *ir, rvec *v,
 +              warninp_t wi)
 +{
 +    t_blocka     *grps;
 +    gmx_groups_t *groups;
 +    int           natoms;
 +    t_symtab     *symtab;
 +    t_atoms       atoms_all;
 +    char          warnbuf[STRLEN], **gnames;
 +    int           nr, ntcg, ntau_t, nref_t, nacc, nofg, nSA, nSA_points, nSA_time, nSA_temp;
 +    real          tau_min;
 +    int           nstcmin;
 +    int           nacg, nfreeze, nfrdim, nenergy, nvcm, nuser;
 +    char         *ptr1[MAXPTR], *ptr2[MAXPTR], *ptr3[MAXPTR];
 +    int           i, j, k, restnm;
 +    real          SAtime;
 +    gmx_bool      bExcl, bTable, bSetTCpar, bAnneal, bRest;
 +    int           nQMmethod, nQMbasis, nQMcharge, nQMmult, nbSH, nCASorb, nCASelec,
 +                  nSAon, nSAoff, nSAsteps, nQMg, nbOPT, nbTS;
 +    char          warn_buf[STRLEN];
 +
 +    if (bVerbose)
 +    {
 +        fprintf(stderr, "processing index file...\n");
 +    }
 +    debug_gmx();
 +    if (ndx == NULL)
 +    {
 +        snew(grps, 1);
 +        snew(grps->index, 1);
 +        snew(gnames, 1);
 +        atoms_all = gmx_mtop_global_atoms(mtop);
 +        analyse(&atoms_all, grps, &gnames, FALSE, TRUE);
 +        free_t_atoms(&atoms_all, FALSE);
 +    }
 +    else
 +    {
 +        grps = init_index(ndx, &gnames);
 +    }
 +
 +    groups = &mtop->groups;
 +    natoms = mtop->natoms;
 +    symtab = &mtop->symtab;
 +
 +    snew(groups->grpname, grps->nr+1);
 +
 +    for (i = 0; (i < grps->nr); i++)
 +    {
 +        groups->grpname[i] = put_symtab(symtab, gnames[i]);
 +    }
 +    groups->grpname[i] = put_symtab(symtab, "rest");
 +    restnm             = i;
 +    srenew(gnames, grps->nr+1);
 +    gnames[restnm]   = *(groups->grpname[i]);
 +    groups->ngrpname = grps->nr+1;
 +
 +    set_warning_line(wi, mdparin, -1);
 +
 +    ntau_t = str_nelem(tau_t, MAXPTR, ptr1);
 +    nref_t = str_nelem(ref_t, MAXPTR, ptr2);
 +    ntcg   = str_nelem(tcgrps, MAXPTR, ptr3);
 +    if ((ntau_t != ntcg) || (nref_t != ntcg))
 +    {
 +        gmx_fatal(FARGS, "Invalid T coupling input: %d groups, %d ref-t values and "
 +                  "%d tau-t values", ntcg, nref_t, ntau_t);
 +    }
 +
 +    bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI == eiBD || EI_TPI(ir->eI));
 +    do_numbering(natoms, groups, ntcg, ptr3, grps, gnames, egcTC,
 +                 restnm, bSetTCpar ? egrptpALL : egrptpALL_GENREST, bVerbose, wi);
 +    nr            = groups->grps[egcTC].nr;
 +    ir->opts.ngtc = nr;
 +    snew(ir->opts.nrdf, nr);
 +    snew(ir->opts.tau_t, nr);
 +    snew(ir->opts.ref_t, nr);
 +    if (ir->eI == eiBD && ir->bd_fric == 0)
 +    {
 +        fprintf(stderr, "bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
 +    }
 +
 +    if (bSetTCpar)
 +    {
 +        if (nr != nref_t)
 +        {
 +            gmx_fatal(FARGS, "Not enough ref-t and tau-t values!");
 +        }
 +
 +        tau_min = 1e20;
 +        for (i = 0; (i < nr); i++)
 +        {
 +            ir->opts.tau_t[i] = strtod(ptr1[i], NULL);
 +            if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
 +            {
 +                sprintf(warn_buf, "With integrator %s tau-t should be larger than 0", ei_names[ir->eI]);
 +                warning_error(wi, warn_buf);
 +            }
 +
 +            if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
 +            {
 +                warning_note(wi, "tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
 +            }
 +
 +            if (ir->opts.tau_t[i] >= 0)
 +            {
 +                tau_min = min(tau_min, ir->opts.tau_t[i]);
 +            }
 +        }
 +        if (ir->etc != etcNO && ir->nsttcouple == -1)
 +        {
 +            ir->nsttcouple = ir_optimal_nsttcouple(ir);
 +        }
 +
 +        if (EI_VV(ir->eI))
 +        {
 +            if ((ir->etc == etcNOSEHOOVER) && (ir->epc == epcBERENDSEN))
 +            {
 +                gmx_fatal(FARGS, "Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure");
 +            }
 +            if ((ir->epc == epcMTTK) && (ir->etc > etcNO))
 +            {
 +                if (ir->nstpcouple != ir->nsttcouple)
 +                {
 +                    int mincouple = min(ir->nstpcouple, ir->nsttcouple);
 +                    ir->nstpcouple = ir->nsttcouple = mincouple;
 +                    sprintf(warn_buf, "for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal.  Both have been reset to min(nsttcouple,nstpcouple) = %d", mincouple);
 +                    warning_note(wi, warn_buf);
 +                }
 +            }
 +        }
 +        /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented
 +           primarily for testing purposes, and does not work with temperature coupling other than 1 */
 +
 +        if (ETC_ANDERSEN(ir->etc))
 +        {
 +            if (ir->nsttcouple != 1)
 +            {
 +                ir->nsttcouple = 1;
 +                sprintf(warn_buf, "Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1");
 +                warning_note(wi, warn_buf);
 +            }
 +        }
 +        nstcmin = tcouple_min_integration_steps(ir->etc);
 +        if (nstcmin > 1)
 +        {
 +            if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
 +            {
 +                sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
 +                        ETCOUPLTYPE(ir->etc),
 +                        tau_min, nstcmin,
 +                        ir->nsttcouple*ir->delta_t);
 +                warning(wi, warn_buf);
 +            }
 +        }
 +        for (i = 0; (i < nr); i++)
 +        {
 +            ir->opts.ref_t[i] = strtod(ptr2[i], NULL);
 +            if (ir->opts.ref_t[i] < 0)
 +            {
 +                gmx_fatal(FARGS, "ref-t for group %d negative", i);
 +            }
 +        }
 +        /* set the lambda mc temperature to the md integrator temperature (which should be defined
 +           if we are in this conditional) if mc_temp is negative */
 +        if (ir->expandedvals->mc_temp < 0)
 +        {
 +            ir->expandedvals->mc_temp = ir->opts.ref_t[0]; /*for now, set to the first reft */
 +        }
 +    }
 +
 +    /* Simulated annealing for each group. There are nr groups */
 +    nSA = str_nelem(anneal, MAXPTR, ptr1);
 +    if (nSA == 1 && (ptr1[0][0] == 'n' || ptr1[0][0] == 'N'))
 +    {
 +        nSA = 0;
 +    }
 +    if (nSA > 0 && nSA != nr)
 +    {
 +        gmx_fatal(FARGS, "Not enough annealing values: %d (for %d groups)\n", nSA, nr);
 +    }
 +    else
 +    {
 +        snew(ir->opts.annealing, nr);
 +        snew(ir->opts.anneal_npoints, nr);
 +        snew(ir->opts.anneal_time, nr);
 +        snew(ir->opts.anneal_temp, nr);
 +        for (i = 0; i < nr; i++)
 +        {
 +            ir->opts.annealing[i]      = eannNO;
 +            ir->opts.anneal_npoints[i] = 0;
 +            ir->opts.anneal_time[i]    = NULL;
 +            ir->opts.anneal_temp[i]    = NULL;
 +        }
 +        if (nSA > 0)
 +        {
 +            bAnneal = FALSE;
 +            for (i = 0; i < nr; i++)
 +            {
 +                if (ptr1[i][0] == 'n' || ptr1[i][0] == 'N')
 +                {
 +                    ir->opts.annealing[i] = eannNO;
 +                }
 +                else if (ptr1[i][0] == 's' || ptr1[i][0] == 'S')
 +                {
 +                    ir->opts.annealing[i] = eannSINGLE;
 +                    bAnneal               = TRUE;
 +                }
 +                else if (ptr1[i][0] == 'p' || ptr1[i][0] == 'P')
 +                {
 +                    ir->opts.annealing[i] = eannPERIODIC;
 +                    bAnneal               = TRUE;
 +                }
 +            }
 +            if (bAnneal)
 +            {
 +                /* Read the other fields too */
 +                nSA_points = str_nelem(anneal_npoints, MAXPTR, ptr1);
 +                if (nSA_points != nSA)
 +                {
 +                    gmx_fatal(FARGS, "Found %d annealing-npoints values for %d groups\n", nSA_points, nSA);
 +                }
 +                for (k = 0, i = 0; i < nr; i++)
 +                {
 +                    ir->opts.anneal_npoints[i] = strtol(ptr1[i], NULL, 10);
 +                    if (ir->opts.anneal_npoints[i] == 1)
 +                    {
 +                        gmx_fatal(FARGS, "Please specify at least a start and an end point for annealing\n");
 +                    }
 +                    snew(ir->opts.anneal_time[i], ir->opts.anneal_npoints[i]);
 +                    snew(ir->opts.anneal_temp[i], ir->opts.anneal_npoints[i]);
 +                    k += ir->opts.anneal_npoints[i];
 +                }
 +
 +                nSA_time = str_nelem(anneal_time, MAXPTR, ptr1);
 +                if (nSA_time != k)
 +                {
 +                    gmx_fatal(FARGS, "Found %d annealing-time values, wanter %d\n", nSA_time, k);
 +                }
 +                nSA_temp = str_nelem(anneal_temp, MAXPTR, ptr2);
 +                if (nSA_temp != k)
 +                {
 +                    gmx_fatal(FARGS, "Found %d annealing-temp values, wanted %d\n", nSA_temp, k);
 +                }
 +
 +                for (i = 0, k = 0; i < nr; i++)
 +                {
 +
 +                    for (j = 0; j < ir->opts.anneal_npoints[i]; j++)
 +                    {
 +                        ir->opts.anneal_time[i][j] = strtod(ptr1[k], NULL);
 +                        ir->opts.anneal_temp[i][j] = strtod(ptr2[k], NULL);
 +                        if (j == 0)
 +                        {
 +                            if (ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
 +                            {
 +                                gmx_fatal(FARGS, "First time point for annealing > init_t.\n");
 +                            }
 +                        }
 +                        else
 +                        {
 +                            /* j>0 */
 +                            if (ir->opts.anneal_time[i][j] < ir->opts.anneal_time[i][j-1])
 +                            {
 +                                gmx_fatal(FARGS, "Annealing timepoints out of order: t=%f comes after t=%f\n",
 +                                          ir->opts.anneal_time[i][j], ir->opts.anneal_time[i][j-1]);
 +                            }
 +                        }
 +                        if (ir->opts.anneal_temp[i][j] < 0)
 +                        {
 +                            gmx_fatal(FARGS, "Found negative temperature in annealing: %f\n", ir->opts.anneal_temp[i][j]);
 +                        }
 +                        k++;
 +                    }
 +                }
 +                /* Print out some summary information, to make sure we got it right */
 +                for (i = 0, k = 0; i < nr; i++)
 +                {
 +                    if (ir->opts.annealing[i] != eannNO)
 +                    {
 +                        j = groups->grps[egcTC].nm_ind[i];
 +                        fprintf(stderr, "Simulated annealing for group %s: %s, %d timepoints\n",
 +                                *(groups->grpname[j]), eann_names[ir->opts.annealing[i]],
 +                                ir->opts.anneal_npoints[i]);
 +                        fprintf(stderr, "Time (ps)   Temperature (K)\n");
 +                        /* All terms except the last one */
 +                        for (j = 0; j < (ir->opts.anneal_npoints[i]-1); j++)
 +                        {
 +                            fprintf(stderr, "%9.1f      %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
 +                        }
 +
 +                        /* Finally the last one */
 +                        j = ir->opts.anneal_npoints[i]-1;
 +                        if (ir->opts.annealing[i] == eannSINGLE)
 +                        {
 +                            fprintf(stderr, "%9.1f-     %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
 +                        }
 +                        else
 +                        {
 +                            fprintf(stderr, "%9.1f      %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
 +                            if (fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0]) > GMX_REAL_EPS)
 +                            {
 +                                warning_note(wi, "There is a temperature jump when your annealing loops back.\n");
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    if (ir->ePull != epullNO)
 +    {
 +        make_pull_groups(ir->pull, pull_grp, grps, gnames);
 +    }
 +
 +    if (ir->bRot)
 +    {
 +        make_rotation_groups(ir->rot, rot_grp, grps, gnames);
 +    }
 +
 +    nacc = str_nelem(acc, MAXPTR, ptr1);
 +    nacg = str_nelem(accgrps, MAXPTR, ptr2);
 +    if (nacg*DIM != nacc)
 +    {
 +        gmx_fatal(FARGS, "Invalid Acceleration input: %d groups and %d acc. values",
 +                  nacg, nacc);
 +    }
 +    do_numbering(natoms, groups, nacg, ptr2, grps, gnames, egcACC,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nr = groups->grps[egcACC].nr;
 +    snew(ir->opts.acc, nr);
 +    ir->opts.ngacc = nr;
 +
 +    for (i = k = 0; (i < nacg); i++)
 +    {
 +        for (j = 0; (j < DIM); j++, k++)
 +        {
 +            ir->opts.acc[i][j] = strtod(ptr1[k], NULL);
 +        }
 +    }
 +    for (; (i < nr); i++)
 +    {
 +        for (j = 0; (j < DIM); j++)
 +        {
 +            ir->opts.acc[i][j] = 0;
 +        }
 +    }
 +
 +    nfrdim  = str_nelem(frdim, MAXPTR, ptr1);
 +    nfreeze = str_nelem(freeze, MAXPTR, ptr2);
 +    if (nfrdim != DIM*nfreeze)
 +    {
 +        gmx_fatal(FARGS, "Invalid Freezing input: %d groups and %d freeze values",
 +                  nfreeze, nfrdim);
 +    }
 +    do_numbering(natoms, groups, nfreeze, ptr2, grps, gnames, egcFREEZE,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nr             = groups->grps[egcFREEZE].nr;
 +    ir->opts.ngfrz = nr;
 +    snew(ir->opts.nFreeze, nr);
 +    for (i = k = 0; (i < nfreeze); i++)
 +    {
 +        for (j = 0; (j < DIM); j++, k++)
 +        {
 +            ir->opts.nFreeze[i][j] = (gmx_strncasecmp(ptr1[k], "Y", 1) == 0);
 +            if (!ir->opts.nFreeze[i][j])
 +            {
 +                if (gmx_strncasecmp(ptr1[k], "N", 1) != 0)
 +                {
 +                    sprintf(warnbuf, "Please use Y(ES) or N(O) for freezedim only "
 +                            "(not %s)", ptr1[k]);
 +                    warning(wi, warn_buf);
 +                }
 +            }
 +        }
 +    }
 +    for (; (i < nr); i++)
 +    {
 +        for (j = 0; (j < DIM); j++)
 +        {
 +            ir->opts.nFreeze[i][j] = 0;
 +        }
 +    }
 +
 +    nenergy = str_nelem(energy, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nenergy, ptr1, grps, gnames, egcENER,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    add_wall_energrps(groups, ir->nwall, symtab);
 +    ir->opts.ngener = groups->grps[egcENER].nr;
 +    nvcm            = str_nelem(vcm, MAXPTR, ptr1);
 +    bRest           =
 +        do_numbering(natoms, groups, nvcm, ptr1, grps, gnames, egcVCM,
 +                     restnm, nvcm == 0 ? egrptpALL_GENREST : egrptpPART, bVerbose, wi);
 +    if (bRest)
 +    {
 +        warning(wi, "Some atoms are not part of any center of mass motion removal group.\n"
 +                "This may lead to artifacts.\n"
 +                "In most cases one should use one group for the whole system.");
 +    }
 +
 +    /* Now we have filled the freeze struct, so we can calculate NRDF */
 +    calc_nrdf(mtop, ir, gnames);
 +
 +    if (v && NULL)
 +    {
 +        real fac, ntot = 0;
 +
 +        /* Must check per group! */
 +        for (i = 0; (i < ir->opts.ngtc); i++)
 +        {
 +            ntot += ir->opts.nrdf[i];
 +        }
 +        if (ntot != (DIM*natoms))
 +        {
 +            fac = sqrt(ntot/(DIM*natoms));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr, "Scaling velocities by a factor of %.3f to account for constraints\n"
 +                        "and removal of center of mass motion\n", fac);
 +            }
 +            for (i = 0; (i < natoms); i++)
 +            {
 +                svmul(fac, v[i], v[i]);
 +            }
 +        }
 +    }
 +
 +    nuser = str_nelem(user1, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser1,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nuser = str_nelem(user2, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser2,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nuser = str_nelem(xtc_grps, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcXTC,
 +                 restnm, egrptpONE, bVerbose, wi);
 +    nofg = str_nelem(orirefitgrp, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nofg, ptr1, grps, gnames, egcORFIT,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +
 +    /* QMMM input processing */
 +    nQMg          = str_nelem(QMMM, MAXPTR, ptr1);
 +    nQMmethod     = str_nelem(QMmethod, MAXPTR, ptr2);
 +    nQMbasis      = str_nelem(QMbasis, MAXPTR, ptr3);
 +    if ((nQMmethod != nQMg) || (nQMbasis != nQMg))
 +    {
 +        gmx_fatal(FARGS, "Invalid QMMM input: %d groups %d basissets"
 +                  " and %d methods\n", nQMg, nQMbasis, nQMmethod);
 +    }
 +    /* group rest, if any, is always MM! */
 +    do_numbering(natoms, groups, nQMg, ptr1, grps, gnames, egcQMMM,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nr            = nQMg; /*atoms->grps[egcQMMM].nr;*/
 +    ir->opts.ngQM = nQMg;
 +    snew(ir->opts.QMmethod, nr);
 +    snew(ir->opts.QMbasis, nr);
 +    for (i = 0; i < nr; i++)
 +    {
 +        /* input consists of strings: RHF CASSCF PM3 .. These need to be
 +         * converted to the corresponding enum in names.c
 +         */
 +        ir->opts.QMmethod[i] = search_QMstring(ptr2[i], eQMmethodNR,
 +                                               eQMmethod_names);
 +        ir->opts.QMbasis[i]  = search_QMstring(ptr3[i], eQMbasisNR,
 +                                               eQMbasis_names);
 +
 +    }
 +    nQMmult   = str_nelem(QMmult, MAXPTR, ptr1);
 +    nQMcharge = str_nelem(QMcharge, MAXPTR, ptr2);
 +    nbSH      = str_nelem(bSH, MAXPTR, ptr3);
 +    snew(ir->opts.QMmult, nr);
 +    snew(ir->opts.QMcharge, nr);
 +    snew(ir->opts.bSH, nr);
 +
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.QMmult[i]   = strtol(ptr1[i], NULL, 10);
 +        ir->opts.QMcharge[i] = strtol(ptr2[i], NULL, 10);
 +        ir->opts.bSH[i]      = (gmx_strncasecmp(ptr3[i], "Y", 1) == 0);
 +    }
 +
 +    nCASelec  = str_nelem(CASelectrons, MAXPTR, ptr1);
 +    nCASorb   = str_nelem(CASorbitals, MAXPTR, ptr2);
 +    snew(ir->opts.CASelectrons, nr);
 +    snew(ir->opts.CASorbitals, nr);
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.CASelectrons[i] = strtol(ptr1[i], NULL, 10);
 +        ir->opts.CASorbitals[i]  = strtol(ptr2[i], NULL, 10);
 +    }
 +    /* special optimization options */
 +
 +    nbOPT = str_nelem(bOPT, MAXPTR, ptr1);
 +    nbTS  = str_nelem(bTS, MAXPTR, ptr2);
 +    snew(ir->opts.bOPT, nr);
 +    snew(ir->opts.bTS, nr);
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i], "Y", 1) == 0);
 +        ir->opts.bTS[i]  = (gmx_strncasecmp(ptr2[i], "Y", 1) == 0);
 +    }
 +    nSAon     = str_nelem(SAon, MAXPTR, ptr1);
 +    nSAoff    = str_nelem(SAoff, MAXPTR, ptr2);
 +    nSAsteps  = str_nelem(SAsteps, MAXPTR, ptr3);
 +    snew(ir->opts.SAon, nr);
 +    snew(ir->opts.SAoff, nr);
 +    snew(ir->opts.SAsteps, nr);
 +
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.SAon[i]    = strtod(ptr1[i], NULL);
 +        ir->opts.SAoff[i]   = strtod(ptr2[i], NULL);
 +        ir->opts.SAsteps[i] = strtol(ptr3[i], NULL, 10);
 +    }
 +    /* end of QMMM input */
 +
 +    if (bVerbose)
 +    {
 +        for (i = 0; (i < egcNR); i++)
 +        {
 +            fprintf(stderr, "%-16s has %d element(s):", gtypes[i], groups->grps[i].nr);
 +            for (j = 0; (j < groups->grps[i].nr); j++)
 +            {
 +                fprintf(stderr, " %s", *(groups->grpname[groups->grps[i].nm_ind[j]]));
 +            }
 +            fprintf(stderr, "\n");
 +        }
 +    }
 +
 +    nr = groups->grps[egcENER].nr;
 +    snew(ir->opts.egp_flags, nr*nr);
 +
 +    bExcl = do_egp_flag(ir, groups, "energygrp-excl", egpexcl, EGP_EXCL);
 +    if (bExcl && ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        warning_error(wi, "Energy group exclusions are not (yet) implemented for the Verlet scheme");
 +    }
 +    if (bExcl && EEL_FULL(ir->coulombtype))
 +    {
 +        warning(wi, "Can not exclude the lattice Coulomb energy between energy groups");
 +    }
 +
 +    bTable = do_egp_flag(ir, groups, "energygrp-table", egptable, EGP_TABLE);
 +    if (bTable && !(ir->vdwtype == evdwUSER) &&
 +        !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
 +        !(ir->coulombtype == eelPMEUSERSWITCH))
 +    {
 +        gmx_fatal(FARGS, "Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
 +    }
 +
 +    decode_cos(efield_x, &(ir->ex[XX]), FALSE);
 +    decode_cos(efield_xt, &(ir->et[XX]), TRUE);
 +    decode_cos(efield_y, &(ir->ex[YY]), FALSE);
 +    decode_cos(efield_yt, &(ir->et[YY]), TRUE);
 +    decode_cos(efield_z, &(ir->ex[ZZ]), FALSE);
 +    decode_cos(efield_zt, &(ir->et[ZZ]), TRUE);
 +
 +    if (ir->bAdress)
 +    {
 +        do_adress_index(ir->adress, groups, gnames, &(ir->opts), wi);
 +    }
 +
 +    for (i = 0; (i < grps->nr); i++)
 +    {
 +        sfree(gnames[i]);
 +    }
 +    sfree(gnames);
 +    done_blocka(grps);
 +    sfree(grps);
 +
 +}
 +
 +
 +
 +static void check_disre(gmx_mtop_t *mtop)
 +{
 +    gmx_ffparams_t *ffparams;
 +    t_functype     *functype;
 +    t_iparams      *ip;
 +    int             i, ndouble, ftype;
 +    int             label, old_label;
 +
 +    if (gmx_mtop_ftype_count(mtop, F_DISRES) > 0)
 +    {
 +        ffparams  = &mtop->ffparams;
 +        functype  = ffparams->functype;
 +        ip        = ffparams->iparams;
 +        ndouble   = 0;
 +        old_label = -1;
 +        for (i = 0; i < ffparams->ntypes; i++)
 +        {
 +            ftype = functype[i];
 +            if (ftype == F_DISRES)
 +            {
 +                label = ip[i].disres.label;
 +                if (label == old_label)
 +                {
 +                    fprintf(stderr, "Distance restraint index %d occurs twice\n", label);
 +                    ndouble++;
 +                }
 +                old_label = label;
 +            }
 +        }
 +        if (ndouble > 0)
 +        {
 +            gmx_fatal(FARGS, "Found %d double distance restraint indices,\n"
 +                      "probably the parameters for multiple pairs in one restraint "
 +                      "are not identical\n", ndouble);
 +        }
 +    }
 +}
 +
 +static gmx_bool absolute_reference(t_inputrec *ir, gmx_mtop_t *sys,
 +                                   gmx_bool posres_only,
 +                                   ivec AbsRef)
 +{
 +    int                  d, g, i;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist             *ilist;
 +    int                  nmol;
 +    t_iparams           *pr;
 +
 +    clear_ivec(AbsRef);
 +
 +    if (!posres_only)
 +    {
 +        /* Check the COM */
 +        for (d = 0; d < DIM; d++)
 +        {
 +            AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
 +        }
 +        /* Check for freeze groups */
 +        for (g = 0; g < ir->opts.ngfrz; g++)
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                if (ir->opts.nFreeze[g][d] != 0)
 +                {
 +                    AbsRef[d] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Check for position restraints */
 +    iloop = gmx_mtop_ilistloop_init(sys);
 +    while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol))
 +    {
 +        if (nmol > 0 &&
 +            (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
 +        {
 +            for (i = 0; i < ilist[F_POSRES].nr; i += 2)
 +            {
 +                pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
 +                for (d = 0; d < DIM; d++)
 +                {
 +                    if (pr->posres.fcA[d] != 0)
 +                    {
 +                        AbsRef[d] = 1;
 +                    }
 +                }
 +            }
 +            for (i = 0; i < ilist[F_FBPOSRES].nr; i += 2)
 +            {
 +                /* Check for flat-bottom posres */
 +                pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]];
 +                if (pr->fbposres.k != 0)
 +                {
 +                    switch (pr->fbposres.geom)
 +                    {
 +                        case efbposresSPHERE:
 +                            AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
 +                            break;
 +                        case efbposresCYLINDER:
 +                            AbsRef[XX] = AbsRef[YY] = 1;
 +                            break;
 +                        case efbposresX: /* d=XX */
 +                        case efbposresY: /* d=YY */
 +                        case efbposresZ: /* d=ZZ */
 +                            d         = pr->fbposres.geom - efbposresX;
 +                            AbsRef[d] = 1;
 +                            break;
 +                        default:
 +                            gmx_fatal(FARGS, " Invalid geometry for flat-bottom position restraint.\n"
 +                                      "Expected nr between 1 and %d. Found %d\n", efbposresNR-1,
 +                                      pr->fbposres.geom);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
 +}
 +
 +void triple_check(const char *mdparin, t_inputrec *ir, gmx_mtop_t *sys,
 +                  warninp_t wi)
 +{
 +    char                      err_buf[256];
 +    int                       i, m, g, nmol, npct;
 +    gmx_bool                  bCharge, bAcc;
 +    real                      gdt_max, *mgrp, mt;
 +    rvec                      acc;
 +    gmx_mtop_atomloop_block_t aloopb;
 +    gmx_mtop_atomloop_all_t   aloop;
 +    t_atom                   *atom;
 +    ivec                      AbsRef;
 +    char                      warn_buf[STRLEN];
 +
 +    set_warning_line(wi, mdparin, -1);
 +
 +    if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
 +        ir->comm_mode == ecmNO &&
 +        !(absolute_reference(ir, sys, FALSE, AbsRef) || ir->nsteps <= 10))
 +    {
 +        warning(wi, "You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
 +    }
 +
 +    /* Check for pressure coupling with absolute position restraints */
 +    if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
 +    {
 +        absolute_reference(ir, sys, TRUE, AbsRef);
 +        {
 +            for (m = 0; m < DIM; m++)
 +            {
 +                if (AbsRef[m] && norm2(ir->compress[m]) > 0)
 +                {
 +                    warning(wi, "You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
 +                    break;
 +                }
 +            }
 +        }
 +    }
 +
 +    bCharge = FALSE;
 +    aloopb  = gmx_mtop_atomloop_block_init(sys);
 +    while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol))
 +    {
 +        if (atom->q != 0 || atom->qB != 0)
 +        {
 +            bCharge = TRUE;
 +        }
 +    }
 +
 +    if (!bCharge)
 +    {
 +        if (EEL_FULL(ir->coulombtype))
 +        {
 +            sprintf(err_buf,
 +                    "You are using full electrostatics treatment %s for a system without charges.\n"
 +                    "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
 +                    EELTYPE(ir->coulombtype), EELTYPE(eelCUT));
 +            warning(wi, err_buf);
 +        }
 +    }
 +    else
 +    {
 +        if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent)
 +        {
 +            sprintf(err_buf,
 +                    "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
 +                    "You might want to consider using %s electrostatics.\n",
 +                    EELTYPE(eelPME));
 +            warning_note(wi, err_buf);
 +        }
 +    }
 +
 +    /* Generalized reaction field */
 +    if (ir->opts.ngtc == 0)
 +    {
 +        sprintf(err_buf, "No temperature coupling while using coulombtype %s",
 +                eel_names[eelGRF]);
 +        CHECK(ir->coulombtype == eelGRF);
 +    }
 +    else
 +    {
 +        sprintf(err_buf, "When using coulombtype = %s"
 +                " ref-t for temperature coupling should be > 0",
 +                eel_names[eelGRF]);
 +        CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
 +    }
 +
 +    if (ir->eI == eiSD1 &&
 +        (gmx_mtop_ftype_count(sys, F_CONSTR) > 0 ||
 +         gmx_mtop_ftype_count(sys, F_SETTLE) > 0))
 +    {
 +        sprintf(warn_buf, "With constraints integrator %s is less accurate, consider using %s instead", ei_names[ir->eI], ei_names[eiSD2]);
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    bAcc = FALSE;
 +    for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
 +    {
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            if (fabs(ir->opts.acc[i][m]) > 1e-6)
 +            {
 +                bAcc = TRUE;
 +            }
 +        }
 +    }
 +    if (bAcc)
 +    {
 +        clear_rvec(acc);
 +        snew(mgrp, sys->groups.grps[egcACC].nr);
 +        aloop = gmx_mtop_atomloop_all_init(sys);
 +        while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
 +        {
 +            mgrp[ggrpnr(&sys->groups, egcACC, i)] += atom->m;
 +        }
 +        mt = 0.0;
 +        for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
 +        {
 +            for (m = 0; (m < DIM); m++)
 +            {
 +                acc[m] += ir->opts.acc[i][m]*mgrp[i];
 +            }
 +            mt += mgrp[i];
 +        }
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            if (fabs(acc[m]) > 1e-6)
 +            {
 +                const char *dim[DIM] = { "X", "Y", "Z" };
 +                fprintf(stderr,
 +                        "Net Acceleration in %s direction, will %s be corrected\n",
 +                        dim[m], ir->nstcomm != 0 ? "" : "not");
 +                if (ir->nstcomm != 0 && m < ndof_com(ir))
 +                {
 +                    acc[m] /= mt;
 +                    for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
 +                    {
 +                        ir->opts.acc[i][m] -= acc[m];
 +                    }
 +                }
 +            }
 +        }
 +        sfree(mgrp);
 +    }
 +
 +    if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 &&
 +        !gmx_within_tol(sys->ffparams.reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +    {
 +        gmx_fatal(FARGS, "Soft-core interactions are only supported with VdW repulsion power 12");
 +    }
 +
 +    if (ir->ePull != epullNO)
 +    {
 +        if (ir->pull->grp[0].nat == 0)
 +        {
 +            absolute_reference(ir, sys, FALSE, AbsRef);
 +            for (m = 0; m < DIM; m++)
 +            {
 +                if (ir->pull->dim[m] && !AbsRef[m])
 +                {
 +                    warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
 +                    break;
 +                }
 +            }
 +        }
 +
 +        if (ir->pull->eGeom == epullgDIRPBC)
 +        {
 +            for (i = 0; i < 3; i++)
 +            {
 +                for (m = 0; m <= i; m++)
 +                {
 +                    if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
 +                        ir->deform[i][m] != 0)
 +                    {
 +                        for (g = 1; g < ir->pull->ngrp; g++)
 +                        {
 +                            if (ir->pull->grp[g].vec[m] != 0)
 +                            {
 +                                gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->eGeom), 'x'+m);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    check_disre(sys);
 +}
 +
 +void double_check(t_inputrec *ir, matrix box, gmx_bool bConstr, warninp_t wi)
 +{
 +    real        min_size;
 +    gmx_bool    bTWIN;
 +    char        warn_buf[STRLEN];
 +    const char *ptr;
 +
 +    ptr = check_box(ir->ePBC, box);
 +    if (ptr)
 +    {
 +        warning_error(wi, ptr);
 +    }
 +
 +    if (bConstr && ir->eConstrAlg == econtSHAKE)
 +    {
 +        if (ir->shake_tol <= 0.0)
 +        {
 +            sprintf(warn_buf, "ERROR: shake-tol must be > 0 instead of %g\n",
 +                    ir->shake_tol);
 +            warning_error(wi, warn_buf);
 +        }
 +
 +        if (IR_TWINRANGE(*ir) && ir->nstlist > 1)
 +        {
 +            sprintf(warn_buf, "With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
 +            if (ir->epc == epcNO)
 +            {
 +                warning(wi, warn_buf);
 +            }
 +            else
 +            {
 +                warning_error(wi, warn_buf);
 +            }
 +        }
 +    }
 +
 +    if ( (ir->eConstrAlg == econtLINCS) && bConstr)
 +    {
 +        /* If we have Lincs constraints: */
 +        if (ir->eI == eiMD && ir->etc == etcNO &&
 +            ir->eConstrAlg == econtLINCS && ir->nLincsIter == 1)
 +        {
 +            sprintf(warn_buf, "For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder < 8))
 +        {
 +            sprintf(warn_buf, "For accurate %s with LINCS constraints, lincs-order should be 8 or more.", ei_names[ir->eI]);
 +            warning_note(wi, warn_buf);
 +        }
 +        if (ir->epc == epcMTTK)
 +        {
 +            warning_error(wi, "MTTK not compatible with lincs -- use shake instead.");
 +        }
 +    }
 +
 +    if (ir->LincsWarnAngle > 90.0)
 +    {
 +        sprintf(warn_buf, "lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
 +        warning(wi, warn_buf);
 +        ir->LincsWarnAngle = 90.0;
 +    }
 +
 +    if (ir->ePBC != epbcNONE)
 +    {
 +        if (ir->nstlist == 0)
 +        {
 +            warning(wi, "With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
 +        }
 +        bTWIN = (ir->rlistlong > ir->rlist);
 +        if (ir->ns_type == ensGRID)
 +        {
 +            if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC, box))
 +            {
 +                sprintf(warn_buf, "ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
 +                        bTWIN ? (ir->rcoulomb == ir->rlistlong ? "rcoulomb" : "rvdw") : "rlist");
 +                warning_error(wi, warn_buf);
 +            }
 +        }
 +        else
 +        {
 +            min_size = min(box[XX][XX], min(box[YY][YY], box[ZZ][ZZ]));
 +            if (2*ir->rlistlong >= min_size)
 +            {
 +                sprintf(warn_buf, "ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
 +                warning_error(wi, warn_buf);
 +                if (TRICLINIC(box))
 +                {
 +                    fprintf(stderr, "Grid search might allow larger cut-off's than simple search with triclinic boxes.");
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void check_chargegroup_radii(const gmx_mtop_t *mtop, const t_inputrec *ir,
 +                             rvec *x,
 +                             warninp_t wi)
 +{
 +    real rvdw1, rvdw2, rcoul1, rcoul2;
 +    char warn_buf[STRLEN];
 +
 +    calc_chargegroup_radii(mtop, x, &rvdw1, &rvdw2, &rcoul1, &rcoul2);
 +
 +    if (rvdw1 > 0)
 +    {
 +        printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
 +               rvdw1, rvdw2);
 +    }
 +    if (rcoul1 > 0)
 +    {
 +        printf("Largest charge group radii for Coulomb:       %5.3f, %5.3f nm\n",
 +               rcoul1, rcoul2);
 +    }
 +
 +    if (ir->rlist > 0)
 +    {
 +        if (rvdw1  + rvdw2  > ir->rlist ||
 +            rcoul1 + rcoul2 > ir->rlist)
 +        {
 +            sprintf(warn_buf, "The sum of the two largest charge group radii (%f) is larger than rlist (%f)\n", max(rvdw1+rvdw2, rcoul1+rcoul2), ir->rlist);
 +            warning(wi, warn_buf);
 +        }
 +        else
 +        {
 +            /* Here we do not use the zero at cut-off macro,
 +             * since user defined interactions might purposely
 +             * not be zero at the cut-off.
 +             */
 +            if (EVDW_IS_ZERO_AT_CUTOFF(ir->vdwtype) &&
 +                rvdw1 + rvdw2 > ir->rlist - ir->rvdw)
 +            {
 +                sprintf(warn_buf, "The sum of the two largest charge group radii (%f) is larger than rlist (%f) - rvdw (%f)\n",
 +                        rvdw1+rvdw2,
 +                        ir->rlist, ir->rvdw);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi, warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi, warn_buf);
 +                }
 +            }
 +            if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype) &&
 +                rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
 +            {
 +                sprintf(warn_buf, "The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f)\n",
 +                        rcoul1+rcoul2,
 +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
 +                        ir->rlistlong, ir->rcoulomb);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi, warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi, warn_buf);
 +                }
 +            }
 +        }
 +    }
 +}
index 4d900df719fcb94d49632c8726e0ca6e380e1e3c,0000000000000000000000000000000000000000..a83b9d876e6a0d58e8f6b2565c52c5a55a7725f8
mode 100644,000000..100644
--- /dev/null
@@@ -1,333 -1,0 +1,333 @@@
-  * architecture independent SIMD intrinsics code.
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS Development Team
 + * Copyright (c) 2012, by the GROMACS development team, led by
 + * David van der Spoel, Berk Hess, Erik Lindahl, and including many
 + * others, as listed in the AUTHORS file in the top-level source
 + * directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +
 +/* The macros in this file are intended to be used for writing
- /* NOTE: floor and blendv are NOT available with SSE2 only acceleration */
++ * architecture-independent SIMD intrinsics code.
 + * To support a new architecture, adding macros here should be (nearly)
 + * all that is needed.
 + */
 +
 +/* Undefine all defines used below so we can include this file multiple times
 + * with different settings from the same source file.
 + */
 +
++/* NOTE: SSE2 acceleration does not include floor or blendv */
 +
 +#undef GMX_SIMD_WIDTH_HERE
 +
 +#undef gmx_epi32
 +
 +/* float/double SIMD register type */
 +#undef gmx_mm_pr
 +
 +#undef gmx_load_pr
 +#undef gmx_load1_pr
 +#undef gmx_set1_pr
 +#undef gmx_setzero_pr
 +#undef gmx_store_pr
 +/* Only used for debugging */
 +#undef gmx_storeu_pr
 +
 +#undef gmx_add_pr
 +#undef gmx_sub_pr
 +#undef gmx_mul_pr
 +#undef gmx_max_pr
 +#undef gmx_cmplt_pr
 +#undef gmx_and_pr
 +#undef gmx_or_pr
 +#undef gmx_andnot_pr
 +
 +/* Only used to speed up the nbnxn tabulated PME kernels */
 +#undef gmx_floor_pr
 +/* Only used with x86 when blendv is faster than comparison */
 +#undef gmx_blendv_pr
 +
 +#undef gmx_movemask_pr
 +
 +/* Integer casts are only used for nbnxn x86 exclusion masks */
 +#undef gmx_mm_castsi128_pr
 +#undef gmx_mm_castsi256_pr
 +
 +/* Conversions only used for nbnxn x86 exclusion masks and PME table lookup */
 +#undef gmx_cvttpr_epi32
 +#undef gmx_cvtepi32_pr
 +
 +#undef gmx_invsqrt_pr
 +#undef gmx_calc_rsq_pr
 +#undef gmx_sum4_pr
 +
 +/* Only required for nbnxn analytical PME kernels */
 +#undef gmx_pmecorrF_pr
 +#undef gmx_pmecorrV_pr
 +
 +
 +/* Half SIMD-width types and operations only for nbnxn 2xnn search+kernels */
 +#undef gmx_mm_hpr
 +
 +#undef gmx_load_hpr
 +#undef gmx_load1_hpr
 +#undef gmx_store_hpr
 +#undef gmx_add_hpr
 +#undef gmx_sub_hpr
 +
 +#undef gmx_sum4_hpr
 +
 +#undef gmx_2hpr_to_pr
 +
 +
 +/* By defining GMX_MM128_HERE or GMX_MM256_HERE before including this file
 + * the same intrinsics, with defines, can be compiled for either 128 or 256
 + * bit wide SSE or AVX instructions.
 + * The gmx_ prefix is replaced by _mm_ or _mm256_ (SSE or AVX).
 + * The _pr suffix is replaced by _ps or _pd (single or double precision).
 + * Note that compiler settings will decide if 128-bit intrinsics will
 + * be translated into SSE or AVX instructions.
 + */
 +
 +#if !defined GMX_MM128_HERE && !defined GMX_MM256_HERE
 +#error "You should define GMX_MM128_HERE or GMX_MM256_HERE"
 +#endif
 +
 +#if defined GMX_MM128_HERE && defined GMX_MM256_HERE
 +#error "You should not define both GMX_MM128_HERE and GMX_MM256_HERE"
 +#endif
 +
 +
 +#ifdef GMX_X86_SSE2
 +
 +#ifdef GMX_MM128_HERE
 +
 +#define gmx_epi32  __m128i
 +
 +#ifndef GMX_DOUBLE
 +
 +#include "gmx_x86_simd_single.h"
 +
 +#define GMX_SIMD_WIDTH_HERE  4
 +
 +#define gmx_mm_pr  __m128
 +
 +#define gmx_load_pr       _mm_load_ps
 +#define gmx_load1_pr      _mm_load1_ps
 +#define gmx_set1_pr       _mm_set1_ps
 +#define gmx_setzero_pr    _mm_setzero_ps
 +#define gmx_store_pr      _mm_store_ps
 +#define gmx_storeu_pr     _mm_storeu_ps
 +
 +#define gmx_add_pr        _mm_add_ps
 +#define gmx_sub_pr        _mm_sub_ps
 +#define gmx_mul_pr        _mm_mul_ps
 +#define gmx_max_pr        _mm_max_ps
 +#define gmx_cmplt_pr      _mm_cmplt_ps
 +#define gmx_and_pr        _mm_and_ps
 +#define gmx_or_pr         _mm_or_ps
 +#define gmx_andnot_pr     _mm_andnot_ps
 +
 +#define gmx_floor_pr      _mm_floor_ps
 +#define gmx_blendv_pr     _mm_blendv_ps
 +
 +#define gmx_movemask_pr   _mm_movemask_ps
 +
 +#define gmx_mm_castsi128_pr gmx_mm_castsi128_ps
 +
 +#define gmx_cvttpr_epi32  _mm_cvttps_epi32
 +#define gmx_cvtepi32_pr   _mm_cvtepi32_ps
 +
 +#define gmx_invsqrt_pr    gmx_mm_invsqrt_ps
 +#define gmx_calc_rsq_pr   gmx_mm_calc_rsq_ps
 +#define gmx_sum4_pr       gmx_mm_sum4_ps
 +
 +#define gmx_pmecorrF_pr   gmx_mm_pmecorrF_ps
 +#define gmx_pmecorrV_pr   gmx_mm_pmecorrV_ps
 +
 +#else /* ifndef GMX_DOUBLE */
 +
 +#include "gmx_x86_simd_double.h"
 +
 +#define GMX_SIMD_WIDTH_HERE  2
 +
 +#define gmx_mm_pr  __m128d
 +
 +#define gmx_load_pr       _mm_load_pd
 +#define gmx_load1_pr      _mm_load1_pd
 +#define gmx_set1_pr       _mm_set1_pd
 +#define gmx_setzero_pr    _mm_setzero_pd
 +#define gmx_store_pr      _mm_store_pd
 +#define gmx_storeu_pr     _mm_storeu_pd
 +
 +#define gmx_add_pr        _mm_add_pd
 +#define gmx_sub_pr        _mm_sub_pd
 +#define gmx_mul_pr        _mm_mul_pd
 +#define gmx_max_pr        _mm_max_pd
 +#define gmx_cmplt_pr      _mm_cmplt_pd
 +#define gmx_and_pr        _mm_and_pd
 +#define gmx_or_pr         _mm_or_pd
 +#define gmx_andnot_pr     _mm_andnot_pd
 +
 +#define gmx_floor_pr      _mm_floor_pd
 +#define gmx_blendv_pr     _mm_blendv_pd
 +
 +#define gmx_movemask_pr   _mm_movemask_pd
 +
 +#define gmx_mm_castsi128_pr gmx_mm_castsi128_pd
 +
 +#define gmx_cvttpr_epi32  _mm_cvttpd_epi32
 +#define gmx_cvtepi32_pr   _mm_cvtepi32_pd
 +
 +#define gmx_invsqrt_pr    gmx_mm_invsqrt_pd
 +#define gmx_calc_rsq_pr   gmx_mm_calc_rsq_pd
 +#define gmx_sum4_pr       gmx_mm_sum4_pd
 +
 +#define gmx_pmecorrF_pr   gmx_mm_pmecorrF_pd
 +#define gmx_pmecorrV_pr   gmx_mm_pmecorrV_pd
 +
 +#endif /* ifndef GMX_DOUBLE */
 +
 +#endif /* GMX_MM128_HERE */
 +
 +#ifdef GMX_MM256_HERE
 +
 +#define gmx_epi32 __m256i
 +
 +#ifndef GMX_DOUBLE
 +
 +#include "gmx_x86_simd_single.h"
 +
 +#define GMX_SIMD_WIDTH_HERE  8
 +
 +#define gmx_mm_pr  __m256
 +
 +#define gmx_load_pr       _mm256_load_ps
 +#define gmx_load1_pr(x)   _mm256_set1_ps((x)[0])
 +#define gmx_set1_pr       _mm256_set1_ps
 +#define gmx_setzero_pr    _mm256_setzero_ps
 +#define gmx_store_pr      _mm256_store_ps
 +#define gmx_storeu_pr     _mm256_storeu_ps
 +
 +#define gmx_add_pr        _mm256_add_ps
 +#define gmx_sub_pr        _mm256_sub_ps
 +#define gmx_mul_pr        _mm256_mul_ps
 +#define gmx_max_pr        _mm256_max_ps
 +/* Not-equal (ordered, non-signaling)  */
 +#define gmx_cmpneq_pr(x, y)  _mm256_cmp_ps(x, y, 0x0c)
 +/* Less-than (ordered, non-signaling)  */
 +#define gmx_cmplt_pr(x, y) _mm256_cmp_ps(x, y, 0x11)
 +#define gmx_and_pr        _mm256_and_ps
 +#define gmx_or_pr         _mm256_or_ps
 +#define gmx_andnot_pr     _mm256_andnot_ps
 +
 +#define gmx_floor_pr      _mm256_floor_ps
 +#define gmx_blendv_pr     _mm256_blendv_ps
 +
 +#define gmx_movemask_pr   _mm256_movemask_ps
 +
 +#define gmx_mm_castsi256_pr _mm256_castsi256_ps
 +
 +#define gmx_cvttpr_epi32  _mm256_cvttps_epi32
 +
 +#define gmx_invsqrt_pr    gmx_mm256_invsqrt_ps
 +#define gmx_calc_rsq_pr   gmx_mm256_calc_rsq_ps
 +#define gmx_sum4_pr       gmx_mm256_sum4_ps
 +
 +#define gmx_pmecorrF_pr   gmx_mm256_pmecorrF_ps
 +#define gmx_pmecorrV_pr   gmx_mm256_pmecorrV_ps
 +
 +#define gmx_loaddh_pr     gmx_mm256_load4_ps
 +
 +/* Half SIMD-width type */
 +#define gmx_mm_hpr  __m128
 +
 +/* Half SIMD-width macros */
 +#define gmx_load_hpr      _mm_load_ps
 +#define gmx_load1_hpr(x)  _mm_set1_ps((x)[0])
 +#define gmx_store_hpr     _mm_store_ps
 +#define gmx_add_hpr       _mm_add_ps
 +#define gmx_sub_hpr       _mm_sub_ps
 +
 +#define gmx_sum4_hpr      gmx_mm256_sum4h_m128
 +
 +/* Conversion between half and full SIMD-width */
 +#define gmx_2hpr_to_pr    gmx_mm256_set_m128
 +
 +#else
 +
 +#include "gmx_x86_simd_double.h"
 +
 +#define GMX_SIMD_WIDTH_HERE  4
 +
 +#define gmx_mm_pr  __m256d
 +
 +#define gmx_load_pr       _mm256_load_pd
 +#define gmx_load1_pr(x)   _mm256_set1_pd((x)[0])
 +#define gmx_set1_pr       _mm256_set1_pd
 +#define gmx_setzero_pr    _mm256_setzero_pd
 +#define gmx_store_pr      _mm256_store_pd
 +#define gmx_storeu_pr     _mm256_storeu_pd
 +
 +#define gmx_add_pr        _mm256_add_pd
 +#define gmx_sub_pr        _mm256_sub_pd
 +#define gmx_mul_pr        _mm256_mul_pd
 +#define gmx_max_pr        _mm256_max_pd
 +/* Not-equal (ordered, non-signaling)  */
 +#define gmx_cmpneq_pr(x, y)  _mm256_cmp_pd(x, y, 0x0c)
 +/* Less-than (ordered, non-signaling)  */
 +#define gmx_cmplt_pr(x, y) _mm256_cmp_pd(x, y, 0x11)
 +#define gmx_and_pr        _mm256_and_pd
 +#define gmx_or_pr         _mm256_or_pd
 +#define gmx_andnot_pr     _mm256_andnot_pd
 +
 +#define gmx_floor_pr      _mm256_floor_pd
 +#define gmx_blendv_pr     _mm256_blendv_pd
 +
 +#define gmx_movemask_pr   _mm256_movemask_pd
 +
 +#define gmx_mm_castsi256_pr _mm256_castsi256_pd
 +
 +#define gmx_cvttpr_epi32  _mm256_cvttpd_epi32
 +
 +#define gmx_invsqrt_pr    gmx_mm256_invsqrt_pd
 +#define gmx_calc_rsq_pr   gmx_mm256_calc_rsq_pd
 +#define gmx_sum4_pr       gmx_mm256_sum4_pd
 +
 +#define gmx_pmecorrF_pr   gmx_mm256_pmecorrF_pd
 +#define gmx_pmecorrV_pr   gmx_mm256_pmecorrV_pd
 +
 +#endif
 +
 +#endif /* GMX_MM256_HERE */
 +
 +#endif /* GMX_X86_SSE2 */
index cd3240ea17bcf48d2b6b226ce66833b8b2a82f66,0000000000000000000000000000000000000000..5639311cb882c9003f006ccfaeb676e3a3001e7f
mode 100644,000000..100644
--- /dev/null
@@@ -1,249 -1,0 +1,251 @@@
-     real                    *nbfp_s4;         /* As nbfp, but with stride 4, size ntype^2*4         */
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +#ifndef _nbnxn_pairlist_h
 +#define _nbnxn_pairlist_h
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +/* A buffer data structure of 64 bytes
 + * to be placed at the beginning and end of structs
 + * to avoid cache invalidation of the real contents
 + * of the struct by writes to neighboring memory.
 + */
 +typedef struct {
 +    int dummy[16];
 +} gmx_cache_protect_t;
 +
 +/* Abstract type for pair searching data */
 +typedef struct nbnxn_search * nbnxn_search_t;
 +
 +/* Function that should return a pointer *ptr to memory
 + * of size nbytes.
 + * Error handling should be done within this function.
 + */
 +typedef void nbnxn_alloc_t (void **ptr, size_t nbytes);
 +
 +/* Function that should free the memory pointed to by *ptr.
 + * NULL should not be passed to this function.
 + */
 +typedef void nbnxn_free_t (void *ptr);
 +
 +typedef struct {
 +    int      cj;    /* The j-cluster                    */
 +    unsigned excl;  /* The exclusion (interaction) bits */
 +} nbnxn_cj_t;
 +
 +/* In nbnxn_ci_t the integer shift contains the shift in the lower 7 bits.
 + * The upper bits contain information for non-bonded kernel optimization.
 + * Simply calculating LJ and Coulomb for all pairs in a cluster pair is fine.
 + * But three flags can be used to skip interactions, currently only for subc=0
 + * !(shift & NBNXN_CI_DO_LJ(subc))   => we can skip LJ for all pairs
 + * shift & NBNXN_CI_HALF_LJ(subc)    => we can skip LJ for the second half of i
 + * !(shift & NBNXN_CI_DO_COUL(subc)) => we can skip Coulomb for all pairs
 + */
 +#define NBNXN_CI_SHIFT          127
 +#define NBNXN_CI_DO_LJ(subc)    (1<<(7+3*(subc)))
 +#define NBNXN_CI_HALF_LJ(subc)  (1<<(8+3*(subc)))
 +#define NBNXN_CI_DO_COUL(subc)  (1<<(9+3*(subc)))
 +
 +/* Simple pair-list i-unit */
 +typedef struct {
 +    int ci;             /* i-cluster             */
 +    int shift;          /* Shift vector index plus possible flags, see above */
 +    int cj_ind_start;   /* Start index into cj   */
 +    int cj_ind_end;     /* End index into cj     */
 +} nbnxn_ci_t;
 +
 +/* Grouped pair-list i-unit */
 +typedef struct {
 +    int sci;            /* i-super-cluster       */
 +    int shift;          /* Shift vector index plus possible flags */
 +    int cj4_ind_start;  /* Start index into cj4  */
 +    int cj4_ind_end;    /* End index into cj4    */
 +} nbnxn_sci_t;
 +
 +typedef struct {
 +    unsigned imask;        /* The i-cluster interactions mask for 1 warp  */
 +    int      excl_ind;     /* Index into the exclusion array for 1 warp   */
 +} nbnxn_im_ei_t;
 +
 +typedef struct {
 +    int           cj[4];   /* The 4 j-clusters                            */
 +    nbnxn_im_ei_t imei[2]; /* The i-cluster mask data       for 2 warps   */
 +} nbnxn_cj4_t;
 +
 +typedef struct {
 +    unsigned pair[32];     /* Exclusion bits for one warp,                *
 +                            * each unsigned has bit for 4*8 i clusters    */
 +} nbnxn_excl_t;
 +
 +typedef struct {
 +    gmx_cache_protect_t cp0;
 +
 +    nbnxn_alloc_t      *alloc;
 +    nbnxn_free_t       *free;
 +
 +    gmx_bool            bSimple;         /* Simple list has na_sc=na_s and uses cj   *
 +                                          * Complex list uses cj4                    */
 +
 +    int                     na_ci;       /* The number of atoms per i-cluster        */
 +    int                     na_cj;       /* The number of atoms per j-cluster        */
 +    int                     na_sc;       /* The number of atoms per super cluster    */
 +    real                    rlist;       /* The radius for constructing the list     */
 +    int                     nci;         /* The number of i-clusters in the list     */
 +    nbnxn_ci_t             *ci;          /* The i-cluster list, size nci             */
 +    int                     ci_nalloc;   /* The allocation size of ci                */
 +    int                     nsci;        /* The number of i-super-clusters in the list */
 +    nbnxn_sci_t            *sci;         /* The i-super-cluster list                 */
 +    int                     sci_nalloc;  /* The allocation size of sci               */
 +
 +    int                     ncj;         /* The number of j-clusters in the list     */
 +    nbnxn_cj_t             *cj;          /* The j-cluster list, size ncj             */
 +    int                     cj_nalloc;   /* The allocation size of cj                */
 +
 +    int                     ncj4;        /* The total number of 4*j clusters         */
 +    nbnxn_cj4_t            *cj4;         /* The 4*j cluster list, size ncj4          */
 +    int                     cj4_nalloc;  /* The allocation size of cj4               */
 +    int                     nexcl;       /* The count for excl                       */
 +    nbnxn_excl_t           *excl;        /* Atom interaction bits (non-exclusions)   */
 +    int                     excl_nalloc; /* The allocation size for excl             */
 +    int                     nci_tot;     /* The total number of i clusters           */
 +
 +    struct nbnxn_list_work *work;
 +
 +    gmx_cache_protect_t     cp1;
 +} nbnxn_pairlist_t;
 +
 +typedef struct {
 +    int                nnbl;        /* number of lists */
 +    nbnxn_pairlist_t **nbl;         /* lists */
 +    gmx_bool           bCombined;   /* TRUE if lists get combined into one (the 1st) */
 +    gmx_bool           bSimple;     /* TRUE if the list of of type "simple"
 +                                       (na_sc=na_s, no super-clusters used) */
 +    int                natpair_ljq; /* Total number of atom pairs for LJ+Q kernel */
 +    int                natpair_lj;  /* Total number of atom pairs for LJ kernel   */
 +    int                natpair_q;   /* Total number of atom pairs for Q kernel    */
 +} nbnxn_pairlist_set_t;
 +
 +enum {
 +    nbatXYZ, nbatXYZQ, nbatX4, nbatX8
 +};
 +
 +typedef struct {
 +    real *f;      /* f, size natoms*fstride                             */
 +    real *fshift; /* Shift force array, size SHIFTS*DIM                 */
 +    int   nV;     /* The size of *Vvdw and *Vc                          */
 +    real *Vvdw;   /* Temporary Van der Waals group energy storage       */
 +    real *Vc;     /* Temporary Coulomb group energy storage             */
 +    int   nVS;    /* The size of *VSvdw and *VSc                        */
 +    real *VSvdw;  /* Temporary SIMD Van der Waals group energy storage  */
 +    real *VSc;    /* Temporary SIMD Coulomb group energy storage        */
 +} nbnxn_atomdata_output_t;
 +
 +/* Block size in atoms for the non-bonded thread force-buffer reduction,
 + * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
 + * Should be small to reduce the reduction and zeroing cost,
 + * but too small will result in overhead.
 + * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
 + */
 +#ifdef GMX_DOUBLE
 +#define NBNXN_BUFFERFLAG_SIZE   8
 +#else
 +#define NBNXN_BUFFERFLAG_SIZE  16
 +#endif
 +
 +/* We currently store the reduction flags as bits in an unsigned int.
 + * In most cases this limits the number of flags to 32.
 + * The reduction will automatically disable the flagging and do a full
 + * reduction when the flags won't fit, but this will lead to very slow
 + * reduction. As we anyhow don't expect reasonable performance with
 + * more than 32 threads, we put in this hard limit.
 + * You can increase this number, but the reduction will be very slow.
 + */
 +#define NBNXN_BUFFERFLAG_MAX_THREADS  32
 +
 +/* Flags for telling if threads write to force output buffers */
 +typedef struct {
 +    int       nflag;       /* The number of flag blocks                         */
 +    unsigned *flag;        /* Bit i is set when thread i writes to a cell-block */
 +    int       flag_nalloc; /* Allocation size of cxy_flag                       */
 +} nbnxn_buffer_flags_t;
 +
 +/* LJ combination rules: geometric, Lorentz-Berthelot, none */
 +enum {
 +    ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
 +};
 +
 +typedef struct {
 +    nbnxn_alloc_t           *alloc;
 +    nbnxn_free_t            *free;
 +    int                      ntype;           /* The number of different atom types                 */
 +    real                    *nbfp;            /* Lennard-Jones 6*C6 and 12*C12 params, size ntype^2*2 */
 +    int                      comb_rule;       /* Combination rule, see enum above                   */
 +    real                    *nbfp_comb;       /* LJ parameter per atom type, size ntype*2           */
++    real                    *nbfp_s4;         /* As nbfp, but with stride 4, size ntype^2*4. This
++                                               * might suit 4-wide SIMD loads of two values (e.g.
++                                               * two floats in single precision on x86).            */
 +    int                      natoms;          /* Number of atoms                                    */
 +    int                      natoms_local;    /* Number of local atoms                           */
 +    int                     *type;            /* Atom types                                         */
 +    real                    *lj_comb;         /* LJ parameters per atom for combining for pairs     */
 +    int                      XFormat;         /* The format of x (and q), enum                      */
 +    int                      FFormat;         /* The format of f, enum                              */
 +    real                    *q;               /* Charges, can be NULL if incorporated in x          */
 +    int                      na_c;            /* The number of atoms per cluster                    */
 +    int                      nenergrp;        /* The number of energy groups                        */
 +    int                      neg_2log;        /* Log2 of nenergrp                                   */
 +    int                     *energrp;         /* The energy groups per cluster, can be NULL         */
 +    gmx_bool                 bDynamicBox;     /* Do we need to update shift_vec every step?    */
 +    rvec                    *shift_vec;       /* Shift vectors, copied from t_forcerec              */
 +    int                      xstride;         /* stride for a coordinate in x (usually 3 or 4)      */
 +    int                      fstride;         /* stride for a coordinate in f (usually 3 or 4)      */
 +    real                    *x;               /* x and possibly q, size natoms*xstride              */
 +    real                    *simd_4xn_diag;   /* indices to set the SIMD 4xN diagonal masks    */
 +    real                    *simd_2xnn_diag;  /* indices to set the SIMD 2x(N+N)diagonal masks */
 +    int                      nout;            /* The number of force arrays                         */
 +    nbnxn_atomdata_output_t *out;             /* Output data structures               */
 +    int                      nalloc;          /* Allocation size of all arrays (for x/f *x/fstride) */
 +    gmx_bool                 bUseBufferFlags; /* Use the flags or operate on all atoms     */
 +    nbnxn_buffer_flags_t     buffer_flags;    /* Flags for buffer zeroing+reduc.  */
 +} nbnxn_atomdata_t;
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif
index fec67cfaf63364d7de310adc3adc0145a0f3ff9f,0000000000000000000000000000000000000000..7680d93a42d42d7c17570931f747eb9058889f23
mode 100644,000000..100644
--- /dev/null
@@@ -1,2954 -1,0 +1,2958 @@@
-         printf("loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[j], buf);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "tables.h"
 +#include "nonbonded.h"
 +#include "invblock.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "md_support.h"
 +#include "md_logging.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "copyrite.h"
 +#include "mtop_util.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_consts.h"
 +#include "statutil.h"
 +#include "gmx_omp_nthreads.h"
 +#include "gmx_detect_hardware.h"
 +
 +#ifdef _MSC_VER
 +/* MSVC definition for __cpuid() */
 +#include <intrin.h>
 +#endif
 +
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +
 +t_forcerec *mk_forcerec(void)
 +{
 +    t_forcerec *fr;
 +
 +    snew(fr, 1);
 +
 +    return fr;
 +}
 +
 +#ifdef DEBUG
 +static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 +{
 +    int i, j;
 +
 +    for (i = 0; (i < atnr); i++)
 +    {
 +        for (j = 0; (j < atnr); j++)
 +        {
 +            fprintf(fp, "%2d - %2d", i, j);
 +            if (bBHAM)
 +            {
 +                fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 +                        BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 +            }
 +            else
 +            {
 +                fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 +                        C12(nbfp, atnr, i, j)/12.0);
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 +{
 +    real *nbfp;
 +    int   i, j, k, atnr;
 +
 +    atnr = idef->atnr;
 +    if (bBHAM)
 +    {
 +        snew(nbfp, 3*atnr*atnr);
 +        for (i = k = 0; (i < atnr); i++)
 +        {
 +            for (j = 0; (j < atnr); j++, k++)
 +            {
 +                BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 +                BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 +                /* nbfp now includes the 6.0 derivative prefactor */
 +                BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        snew(nbfp, 2*atnr*atnr);
 +        for (i = k = 0; (i < atnr); i++)
 +        {
 +            for (j = 0; (j < atnr); j++, k++)
 +            {
 +                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 +                C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 +            }
 +        }
 +    }
 +
 +    return nbfp;
 +}
 +
 +/* This routine sets fr->solvent_opt to the most common solvent in the
 + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 + * the fr->solvent_type array with the correct type (or esolNO).
 + *
 + * Charge groups that fulfill the conditions but are not identical to the
 + * most common one will be marked as esolNO in the solvent_type array.
 + *
 + * TIP3p is identical to SPC for these purposes, so we call it
 + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 + *
 + * NOTE: QM particle should not
 + * become an optimized solvent. Not even if there is only one charge
 + * group in the Qm
 + */
 +
 +typedef struct
 +{
 +    int    model;
 +    int    count;
 +    int    vdwtype[4];
 +    real   charge[4];
 +} solvent_parameters_t;
 +
 +static void
 +check_solvent_cg(const gmx_moltype_t    *molt,
 +                 int                     cg0,
 +                 int                     nmol,
 +                 const unsigned char    *qm_grpnr,
 +                 const t_grps           *qm_grps,
 +                 t_forcerec   *          fr,
 +                 int                    *n_solvent_parameters,
 +                 solvent_parameters_t  **solvent_parameters_p,
 +                 int                     cginfo,
 +                 int                    *cg_sp)
 +{
 +    const t_blocka     *  excl;
 +    t_atom               *atom;
 +    int                   j, k;
 +    int                   j0, j1, nj;
 +    gmx_bool              perturbed;
 +    gmx_bool              has_vdw[4];
 +    gmx_bool              match;
 +    real                  tmp_charge[4];
 +    int                   tmp_vdwtype[4];
 +    int                   tjA;
 +    gmx_bool              qm;
 +    solvent_parameters_t *solvent_parameters;
 +
 +    /* We use a list with parameters for each solvent type.
 +     * Every time we discover a new molecule that fulfills the basic
 +     * conditions for a solvent we compare with the previous entries
 +     * in these lists. If the parameters are the same we just increment
 +     * the counter for that type, and otherwise we create a new type
 +     * based on the current molecule.
 +     *
 +     * Once we've finished going through all molecules we check which
 +     * solvent is most common, and mark all those molecules while we
 +     * clear the flag on all others.
 +     */
 +
 +    solvent_parameters = *solvent_parameters_p;
 +
 +    /* Mark the cg first as non optimized */
 +    *cg_sp = -1;
 +
 +    /* Check if this cg has no exclusions with atoms in other charge groups
 +     * and all atoms inside the charge group excluded.
 +     * We only have 3 or 4 atom solvent loops.
 +     */
 +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
 +        !GET_CGINFO_EXCL_INTRA(cginfo))
 +    {
 +        return;
 +    }
 +
 +    /* Get the indices of the first atom in this charge group */
 +    j0     = molt->cgs.index[cg0];
 +    j1     = molt->cgs.index[cg0+1];
 +
 +    /* Number of atoms in our molecule */
 +    nj     = j1 - j0;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Moltype '%s': there are %d atoms in this charge group\n",
 +                *molt->name, nj);
 +    }
 +
 +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 +     * otherwise skip it.
 +     */
 +    if (nj < 3 || nj > 4)
 +    {
 +        return;
 +    }
 +
 +    /* Check if we are doing QM on this group */
 +    qm = FALSE;
 +    if (qm_grpnr != NULL)
 +    {
 +        for (j = j0; j < j1 && !qm; j++)
 +        {
 +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
 +        }
 +    }
 +    /* Cannot use solvent optimization with QM */
 +    if (qm)
 +    {
 +        return;
 +    }
 +
 +    atom = molt->atoms.atom;
 +
 +    /* Still looks like a solvent, time to check parameters */
 +
 +    /* If it is perturbed (free energy) we can't use the solvent loops,
 +     * so then we just skip to the next molecule.
 +     */
 +    perturbed = FALSE;
 +
 +    for (j = j0; j < j1 && !perturbed; j++)
 +    {
 +        perturbed = PERTURBED(atom[j]);
 +    }
 +
 +    if (perturbed)
 +    {
 +        return;
 +    }
 +
 +    /* Now it's only a question if the VdW and charge parameters
 +     * are OK. Before doing the check we compare and see if they are
 +     * identical to a possible previous solvent type.
 +     * First we assign the current types and charges.
 +     */
 +    for (j = 0; j < nj; j++)
 +    {
 +        tmp_vdwtype[j] = atom[j0+j].type;
 +        tmp_charge[j]  = atom[j0+j].q;
 +    }
 +
 +    /* Does it match any previous solvent type? */
 +    for (k = 0; k < *n_solvent_parameters; k++)
 +    {
 +        match = TRUE;
 +
 +
 +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 +        if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 +             (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 +        {
 +            match = FALSE;
 +        }
 +
 +        /* Check that types & charges match for all atoms in molecule */
 +        for (j = 0; j < nj && match == TRUE; j++)
 +        {
 +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 +            {
 +                match = FALSE;
 +            }
 +            if (tmp_charge[j] != solvent_parameters[k].charge[j])
 +            {
 +                match = FALSE;
 +            }
 +        }
 +        if (match == TRUE)
 +        {
 +            /* Congratulations! We have a matched solvent.
 +             * Flag it with this type for later processing.
 +             */
 +            *cg_sp = k;
 +            solvent_parameters[k].count += nmol;
 +
 +            /* We are done with this charge group */
 +            return;
 +        }
 +    }
 +
 +    /* If we get here, we have a tentative new solvent type.
 +     * Before we add it we must check that it fulfills the requirements
 +     * of the solvent optimized loops. First determine which atoms have
 +     * VdW interactions.
 +     */
 +    for (j = 0; j < nj; j++)
 +    {
 +        has_vdw[j] = FALSE;
 +        tjA        = tmp_vdwtype[j];
 +
 +        /* Go through all other tpes and see if any have non-zero
 +         * VdW parameters when combined with this one.
 +         */
 +        for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 +        {
 +            /* We already checked that the atoms weren't perturbed,
 +             * so we only need to check state A now.
 +             */
 +            if (fr->bBHAM)
 +            {
 +                has_vdw[j] = (has_vdw[j] ||
 +                              (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 +                              (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 +                              (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 +            }
 +            else
 +            {
 +                /* Standard LJ */
 +                has_vdw[j] = (has_vdw[j] ||
 +                              (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 +                              (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 +            }
 +        }
 +    }
 +
 +    /* Now we know all we need to make the final check and assignment. */
 +    if (nj == 3)
 +    {
 +        /* So, is it an SPC?
 +         * For this we require thatn all atoms have charge,
 +         * the charges on atom 2 & 3 should be the same, and only
 +         * atom 1 might have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            tmp_charge[0]  != 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1])
 +        {
 +            srenew(solvent_parameters, *n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for (k = 0; k < 3; k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +    else if (nj == 4)
 +    {
 +        /* Or could it be a TIP4P?
 +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 +         * Only atom 1 mght have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            has_vdw[3] == FALSE &&
 +            tmp_charge[0]  == 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1] &&
 +            tmp_charge[3]  != 0)
 +        {
 +            srenew(solvent_parameters, *n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for (k = 0; k < 4; k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +
 +    *solvent_parameters_p = solvent_parameters;
 +}
 +
 +static void
 +check_solvent(FILE  *                fp,
 +              const gmx_mtop_t  *    mtop,
 +              t_forcerec  *          fr,
 +              cginfo_mb_t           *cginfo_mb)
 +{
 +    const t_block     *   cgs;
 +    const t_block     *   mols;
 +    const gmx_moltype_t  *molt;
 +    int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 +    int                   n_solvent_parameters;
 +    solvent_parameters_t *solvent_parameters;
 +    int                 **cg_sp;
 +    int                   bestsp, bestsol;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Going to determine what solvent types we have.\n");
 +    }
 +
 +    mols = &mtop->mols;
 +
 +    n_solvent_parameters = 0;
 +    solvent_parameters   = NULL;
 +    /* Allocate temporary array for solvent type */
 +    snew(cg_sp, mtop->nmolblock);
 +
 +    cg_offset = 0;
 +    at_offset = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molt = &mtop->moltype[mtop->molblock[mb].type];
 +        cgs  = &molt->cgs;
 +        /* Here we have to loop over all individual molecules
 +         * because we need to check for QMMM particles.
 +         */
 +        snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
 +        for (mol = 0; mol < nmol_ch; mol++)
 +        {
 +            cgm = mol*cgs->nr;
 +            am  = mol*cgs->index[cgs->nr];
 +            for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 +            {
 +                check_solvent_cg(molt, cg_mol, nmol,
 +                                 mtop->groups.grpnr[egcQMMM] ?
 +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 +                                 &mtop->groups.grps[egcQMMM],
 +                                 fr,
 +                                 &n_solvent_parameters, &solvent_parameters,
 +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
 +                                 &cg_sp[mb][cgm+cg_mol]);
 +            }
 +        }
 +        cg_offset += cgs->nr;
 +        at_offset += cgs->index[cgs->nr];
 +    }
 +
 +    /* Puh! We finished going through all charge groups.
 +     * Now find the most common solvent model.
 +     */
 +
 +    /* Most common solvent this far */
 +    bestsp = -2;
 +    for (i = 0; i < n_solvent_parameters; i++)
 +    {
 +        if (bestsp == -2 ||
 +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
 +        {
 +            bestsp = i;
 +        }
 +    }
 +
 +    if (bestsp >= 0)
 +    {
 +        bestsol = solvent_parameters[bestsp].model;
 +    }
 +    else
 +    {
 +        bestsol = esolNO;
 +    }
 +
 +#ifdef DISABLE_WATER_NLIST
 +    bestsol = esolNO;
 +#endif
 +
 +    fr->nWatMol = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 +        for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 +        {
 +            if (cg_sp[mb][i] == bestsp)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 +                fr->nWatMol += nmol;
 +            }
 +            else
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 +            }
 +        }
 +        sfree(cg_sp[mb]);
 +    }
 +    sfree(cg_sp);
 +
 +    if (bestsol != esolNO && fp != NULL)
 +    {
 +        fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 +                esol_names[bestsol],
 +                solvent_parameters[bestsp].count);
 +    }
 +
 +    sfree(solvent_parameters);
 +    fr->solvent_opt = bestsol;
 +}
 +
 +enum {
 +    acNONE = 0, acCONSTRAINT, acSETTLE
 +};
 +
 +static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 +                                   t_forcerec *fr, gmx_bool bNoSolvOpt,
 +                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
 +{
 +    const t_block        *cgs;
 +    const t_blocka       *excl;
 +    const gmx_moltype_t  *molt;
 +    const gmx_molblock_t *molb;
 +    cginfo_mb_t          *cginfo_mb;
 +    gmx_bool             *type_VDW;
 +    int                  *cginfo;
 +    int                   cg_offset, a_offset, cgm, am;
 +    int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 +    int                  *a_con;
 +    int                   ftype;
 +    int                   ia;
 +    gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 +
 +    ncg_tot = ncg_mtop(mtop);
 +    snew(cginfo_mb, mtop->nmolblock);
 +
 +    snew(type_VDW, fr->ntype);
 +    for (ai = 0; ai < fr->ntype; ai++)
 +    {
 +        type_VDW[ai] = FALSE;
 +        for (j = 0; j < fr->ntype; j++)
 +        {
 +            type_VDW[ai] = type_VDW[ai] ||
 +                fr->bBHAM ||
 +                C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 +                C12(fr->nbfp, fr->ntype, ai, j) != 0;
 +        }
 +    }
 +
 +    *bExcl_IntraCGAll_InterCGNone = TRUE;
 +
 +    excl_nalloc = 10;
 +    snew(bExcl, excl_nalloc);
 +    cg_offset = 0;
 +    a_offset  = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        cgs  = &molt->cgs;
 +        excl = &molt->excls;
 +
 +        /* Check if the cginfo is identical for all molecules in this block.
 +         * If so, we only need an array of the size of one molecule.
 +         * Otherwise we make an array of #mol times #cgs per molecule.
 +         */
 +        bId = TRUE;
 +        am  = 0;
 +        for (m = 0; m < molb->nmol; m++)
 +        {
 +            am = m*cgs->index[cgs->nr];
 +            for (cg = 0; cg < cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 +                    ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 +                {
 +                    bId = FALSE;
 +                }
 +                if (mtop->groups.grpnr[egcQMMM] != NULL)
 +                {
 +                    for (ai = a0; ai < a1; ai++)
 +                    {
 +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 +                        {
 +                            bId = FALSE;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        cginfo_mb[mb].cg_start = cg_offset;
 +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 +        snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 +        cginfo = cginfo_mb[mb].cginfo;
 +
 +        /* Set constraints flags for constrained atoms */
 +        snew(a_con, molt->atoms.nr);
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +            {
 +                int nral;
 +
 +                nral = NRAL(ftype);
 +                for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 +                {
 +                    int a;
 +
 +                    for (a = 0; a < nral; a++)
 +                    {
 +                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 +                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 +                    }
 +                }
 +            }
 +        }
 +
 +        for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 +        {
 +            cgm = m*cgs->nr;
 +            am  = m*cgs->index[cgs->nr];
 +            for (cg = 0; cg < cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +
 +                /* Store the energy group in cginfo */
 +                gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 +                SET_CGINFO_GID(cginfo[cgm+cg], gid);
 +
 +                /* Check the intra/inter charge group exclusions */
 +                if (a1-a0 > excl_nalloc)
 +                {
 +                    excl_nalloc = a1 - a0;
 +                    srenew(bExcl, excl_nalloc);
 +                }
 +                /* bExclIntraAll: all intra cg interactions excluded
 +                 * bExclInter:    any inter cg interactions excluded
 +                 */
 +                bExclIntraAll = TRUE;
 +                bExclInter    = FALSE;
 +                bHaveVDW      = FALSE;
 +                bHaveQ        = FALSE;
 +                for (ai = a0; ai < a1; ai++)
 +                {
 +                    /* Check VDW and electrostatic interactions */
 +                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 +                                            type_VDW[molt->atoms.atom[ai].typeB]);
 +                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 +                                            molt->atoms.atom[ai].qB != 0);
 +
 +                    /* Clear the exclusion list for atom ai */
 +                    for (aj = a0; aj < a1; aj++)
 +                    {
 +                        bExcl[aj-a0] = FALSE;
 +                    }
 +                    /* Loop over all the exclusions of atom ai */
 +                    for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 +                    {
 +                        aj = excl->a[j];
 +                        if (aj < a0 || aj >= a1)
 +                        {
 +                            bExclInter = TRUE;
 +                        }
 +                        else
 +                        {
 +                            bExcl[aj-a0] = TRUE;
 +                        }
 +                    }
 +                    /* Check if ai excludes a0 to a1 */
 +                    for (aj = a0; aj < a1; aj++)
 +                    {
 +                        if (!bExcl[aj-a0])
 +                        {
 +                            bExclIntraAll = FALSE;
 +                        }
 +                    }
 +
 +                    switch (a_con[ai])
 +                    {
 +                        case acCONSTRAINT:
 +                            SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 +                            break;
 +                        case acSETTLE:
 +                            SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 +                            break;
 +                        default:
 +                            break;
 +                    }
 +                }
 +                if (bExclIntraAll)
 +                {
 +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 +                }
 +                if (bExclInter)
 +                {
 +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 +                }
 +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 +                {
 +                    /* The size in cginfo is currently only read with DD */
 +                    gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 +                }
 +                if (bHaveVDW)
 +                {
 +                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 +                }
 +                if (bHaveQ)
 +                {
 +                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 +                }
 +                /* Store the charge group size */
 +                SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 +
 +                if (!bExclIntraAll || bExclInter)
 +                {
 +                    *bExcl_IntraCGAll_InterCGNone = FALSE;
 +                }
 +            }
 +        }
 +
 +        sfree(a_con);
 +
 +        cg_offset += molb->nmol*cgs->nr;
 +        a_offset  += molb->nmol*cgs->index[cgs->nr];
 +    }
 +    sfree(bExcl);
 +
 +    /* the solvent optimizer is called after the QM is initialized,
 +     * because we don't want to have the QM subsystemto become an
 +     * optimized solvent
 +     */
 +
 +    check_solvent(fplog, mtop, fr, cginfo_mb);
 +
 +    if (getenv("GMX_NO_SOLV_OPT"))
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 +                    "Disabling all solvent optimization\n");
 +        }
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (bNoSolvOpt)
 +    {
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (!fr->solvent_opt)
 +    {
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 +            }
 +        }
 +    }
 +
 +    return cginfo_mb;
 +}
 +
 +static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 +{
 +    int  ncg, mb, cg;
 +    int *cginfo;
 +
 +    ncg = cgi_mb[nmb-1].cg_end;
 +    snew(cginfo, ncg);
 +    mb = 0;
 +    for (cg = 0; cg < ncg; cg++)
 +    {
 +        while (cg >= cgi_mb[mb].cg_end)
 +        {
 +            mb++;
 +        }
 +        cginfo[cg] =
 +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 +    }
 +
 +    return cginfo;
 +}
 +
 +static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 +{
 +    double         qsum, q2sum, q;
 +    int            mb, nmol, i;
 +    const t_atoms *atoms;
 +
 +    qsum  = 0;
 +    q2sum = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        nmol  = mtop->molblock[mb].nmol;
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +        for (i = 0; i < atoms->nr; i++)
 +        {
 +            q      = atoms->atom[i].q;
 +            qsum  += nmol*q;
 +            q2sum += nmol*q*q;
 +        }
 +    }
 +    fr->qsum[0]  = qsum;
 +    fr->q2sum[0] = q2sum;
 +    if (fr->efep != efepNO)
 +    {
 +        qsum  = 0;
 +        q2sum = 0;
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            nmol  = mtop->molblock[mb].nmol;
 +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +            for (i = 0; i < atoms->nr; i++)
 +            {
 +                q      = atoms->atom[i].qB;
 +                qsum  += nmol*q;
 +                q2sum += nmol*q*q;
 +            }
 +            fr->qsum[1]  = qsum;
 +            fr->q2sum[1] = q2sum;
 +        }
 +    }
 +    else
 +    {
 +        fr->qsum[1]  = fr->qsum[0];
 +        fr->q2sum[1] = fr->q2sum[0];
 +    }
 +    if (log)
 +    {
 +        if (fr->efep == efepNO)
 +        {
 +            fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 +        }
 +        else
 +        {
 +            fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 +                    fr->qsum[0], fr->qsum[1]);
 +        }
 +    }
 +}
 +
 +void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
 +{
 +    if (fr->eeltype == eelGRF)
 +    {
 +        calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 +                   fr->rcoulomb, fr->temp, fr->zsquare, box,
 +                   &fr->kappa, &fr->k_rf, &fr->c_rf);
 +    }
 +}
 +
 +void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 +{
 +    const t_atoms  *atoms, *atoms_tpi;
 +    const t_blocka *excl;
 +    int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 +    long long int   npair, npair_ij, tmpi, tmpj;
 +#else
 +    double          npair, npair_ij, tmpi, tmpj;
 +#endif
 +    double          csix, ctwelve;
 +    int             ntp, *typecount;
 +    gmx_bool        bBHAM;
 +    real           *nbfp;
 +
 +    ntp   = fr->ntype;
 +    bBHAM = fr->bBHAM;
 +    nbfp  = fr->nbfp;
 +
 +    for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 +    {
 +        csix    = 0;
 +        ctwelve = 0;
 +        npair   = 0;
 +        nexcl   = 0;
 +        if (!fr->n_tpi)
 +        {
 +            /* Count the types so we avoid natoms^2 operations */
 +            snew(typecount, ntp);
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for (i = 0; i < atoms->nr; i++)
 +                {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    typecount[tpi] += nmol;
 +                }
 +            }
 +            for (tpi = 0; tpi < ntp; tpi++)
 +            {
 +                for (tpj = tpi; tpj < ntp; tpj++)
 +                {
 +                    tmpi = typecount[tpi];
 +                    tmpj = typecount[tpj];
 +                    if (tpi != tpj)
 +                    {
 +                        npair_ij = tmpi*tmpj;
 +                    }
 +                    else
 +                    {
 +                        npair_ij = tmpi*(tmpi - 1)/2;
 +                    }
 +                    if (bBHAM)
 +                    {
 +                        /* nbfp now includes the 6.0 derivative prefactor */
 +                        csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                    }
 +                    else
 +                    {
 +                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                        csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
 +                        ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
 +                    }
 +                    npair += npair_ij;
 +                }
 +            }
 +            sfree(typecount);
 +            /* Subtract the excluded pairs.
 +             * The main reason for substracting exclusions is that in some cases
 +             * some combinations might never occur and the parameters could have
 +             * any value. These unused values should not influence the dispersion
 +             * correction.
 +             */
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 +                for (i = 0; (i < atoms->nr); i++)
 +                {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    j1  = excl->index[i];
 +                    j2  = excl->index[i+1];
 +                    for (j = j1; j < j2; j++)
 +                    {
 +                        k = excl->a[j];
 +                        if (k > i)
 +                        {
 +                            if (q == 0)
 +                            {
 +                                tpj = atoms->atom[k].type;
 +                            }
 +                            else
 +                            {
 +                                tpj = atoms->atom[k].typeB;
 +                            }
 +                            if (bBHAM)
 +                            {
 +                                /* nbfp now includes the 6.0 derivative prefactor */
 +                                csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                            }
 +                            else
 +                            {
 +                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                                csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
 +                                ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
 +                            }
 +                            nexcl += nmol;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Only correct for the interaction of the test particle
 +             * with the rest of the system.
 +             */
 +            atoms_tpi =
 +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
 +
 +            npair = 0;
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for (j = 0; j < atoms->nr; j++)
 +                {
 +                    nmolc = nmol;
 +                    /* Remove the interaction of the test charge group
 +                     * with itself.
 +                     */
 +                    if (mb == mtop->nmolblock-1)
 +                    {
 +                        nmolc--;
 +
 +                        if (mb == 0 && nmol == 1)
 +                        {
 +                            gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
 +                        }
 +                    }
 +                    if (q == 0)
 +                    {
 +                        tpj = atoms->atom[j].type;
 +                    }
 +                    else
 +                    {
 +                        tpj = atoms->atom[j].typeB;
 +                    }
 +                    for (i = 0; i < fr->n_tpi; i++)
 +                    {
 +                        if (q == 0)
 +                        {
 +                            tpi = atoms_tpi->atom[i].type;
 +                        }
 +                        else
 +                        {
 +                            tpi = atoms_tpi->atom[i].typeB;
 +                        }
 +                        if (bBHAM)
 +                        {
 +                            /* nbfp now includes the 6.0 derivative prefactor */
 +                            csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                        }
 +                        else
 +                        {
 +                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                            csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
 +                            ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
 +                        }
 +                        npair += nmolc;
 +                    }
 +                }
 +            }
 +        }
 +        if (npair - nexcl <= 0 && fplog)
 +        {
 +            fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
 +            csix     = 0;
 +            ctwelve  = 0;
 +        }
 +        else
 +        {
 +            csix    /= npair - nexcl;
 +            ctwelve /= npair - nexcl;
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug, "Counted %d exclusions\n", nexcl);
 +            fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
 +            fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
 +        }
 +        fr->avcsix[q]    = csix;
 +        fr->avctwelve[q] = ctwelve;
 +    }
 +    if (fplog != NULL)
 +    {
 +        if (fr->eDispCorr == edispcAllEner ||
 +            fr->eDispCorr == edispcAllEnerPres)
 +        {
 +            fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                    fr->avcsix[0], fr->avctwelve[0]);
 +        }
 +        else
 +        {
 +            fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
 +        }
 +    }
 +}
 +
 +
 +static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
 +                           const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *at1, *at2;
 +    int            mt1, mt2, i, j, tpi, tpj, ntypes;
 +    real           b, bmin;
 +    real          *nbfp;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
 +    }
 +    nbfp   = fr->nbfp;
 +    ntypes = fr->ntype;
 +
 +    bmin           = -1;
 +    fr->bham_b_max = 0;
 +    for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
 +    {
 +        at1 = &mtop->moltype[mt1].atoms;
 +        for (i = 0; (i < at1->nr); i++)
 +        {
 +            tpi = at1->atom[i].type;
 +            if (tpi >= ntypes)
 +            {
 +                gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
 +            }
 +
 +            for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
 +            {
 +                at2 = &mtop->moltype[mt2].atoms;
 +                for (j = 0; (j < at2->nr); j++)
 +                {
 +                    tpj = at2->atom[j].type;
 +                    if (tpj >= ntypes)
 +                    {
 +                        gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
 +                    }
 +                    b = BHAMB(nbfp, ntypes, tpi, tpj);
 +                    if (b > fr->bham_b_max)
 +                    {
 +                        fr->bham_b_max = b;
 +                    }
 +                    if ((b < bmin) || (bmin == -1))
 +                    {
 +                        bmin = b;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
 +                bmin, fr->bham_b_max);
 +    }
 +}
 +
 +static void make_nbf_tables(FILE *fp, const output_env_t oenv,
 +                            t_forcerec *fr, real rtab,
 +                            const t_commrec *cr,
 +                            const char *tabfn, char *eg1, char *eg2,
 +                            t_nblists *nbl)
 +{
 +    char buf[STRLEN];
 +    int  i, j;
 +
 +    if (tabfn == NULL)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
 +        }
 +        return;
 +    }
 +
 +    sprintf(buf, "%s", tabfn);
 +    if (eg1 && eg2)
 +    {
 +        /* Append the two energy group names */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
 +                eg1, eg2, ftp2ext(efXVG));
 +    }
 +    nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
 +    /* Copy the contents of the table to separate coulomb and LJ tables too,
 +     * to improve cache performance.
 +     */
 +    /* For performance reasons we want
 +     * the table data to be aligned to 16-byte. The pointers could be freed
 +     * but currently aren't.
 +     */
 +    nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
 +    nbl->table_elec.format        = nbl->table_elec_vdw.format;
 +    nbl->table_elec.r             = nbl->table_elec_vdw.r;
 +    nbl->table_elec.n             = nbl->table_elec_vdw.n;
 +    nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
 +    nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
 +    nbl->table_elec.ninteractions = 1;
 +    nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
 +    snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
 +
 +    nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 +    nbl->table_vdw.format        = nbl->table_elec_vdw.format;
 +    nbl->table_vdw.r             = nbl->table_elec_vdw.r;
 +    nbl->table_vdw.n             = nbl->table_elec_vdw.n;
 +    nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
 +    nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
 +    nbl->table_vdw.ninteractions = 2;
 +    nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
 +    snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
 +
 +    for (i = 0; i <= nbl->table_elec_vdw.n; i++)
 +    {
 +        for (j = 0; j < 4; j++)
 +        {
 +            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
 +        }
 +        for (j = 0; j < 8; j++)
 +        {
 +            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
 +        }
 +    }
 +}
 +
 +static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
 +                         int *ncount, int **count)
 +{
 +    const gmx_moltype_t *molt;
 +    const t_ilist       *il;
 +    int                  mt, ftype, stride, i, j, tabnr;
 +
 +    for (mt = 0; mt < mtop->nmoltype; mt++)
 +    {
 +        molt = &mtop->moltype[mt];
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (ftype == ftype1 || ftype == ftype2)
 +            {
 +                il     = &molt->ilist[ftype];
 +                stride = 1 + NRAL(ftype);
 +                for (i = 0; i < il->nr; i += stride)
 +                {
 +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
 +                    if (tabnr < 0)
 +                    {
 +                        gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
 +                    }
 +                    if (tabnr >= *ncount)
 +                    {
 +                        srenew(*count, tabnr+1);
 +                        for (j = *ncount; j < tabnr+1; j++)
 +                        {
 +                            (*count)[j] = 0;
 +                        }
 +                        *ncount = tabnr+1;
 +                    }
 +                    (*count)[tabnr]++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static bondedtable_t *make_bonded_tables(FILE *fplog,
 +                                         int ftype1, int ftype2,
 +                                         const gmx_mtop_t *mtop,
 +                                         const char *basefn, const char *tabext)
 +{
 +    int            i, ncount, *count;
 +    char           tabfn[STRLEN];
 +    bondedtable_t *tab;
 +
 +    tab = NULL;
 +
 +    ncount = 0;
 +    count  = NULL;
 +    count_tables(ftype1, ftype2, mtop, &ncount, &count);
 +
 +    if (ncount > 0)
 +    {
 +        snew(tab, ncount);
 +        for (i = 0; i < ncount; i++)
 +        {
 +            if (count[i] > 0)
 +            {
 +                sprintf(tabfn, "%s", basefn);
 +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
 +                        tabext, i, ftp2ext(efXVG));
 +                tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
 +            }
 +        }
 +        sfree(count);
 +    }
 +
 +    return tab;
 +}
 +
 +void forcerec_set_ranges(t_forcerec *fr,
 +                         int ncg_home, int ncg_force,
 +                         int natoms_force,
 +                         int natoms_force_constr, int natoms_f_novirsum)
 +{
 +    fr->cg0 = 0;
 +    fr->hcg = ncg_home;
 +
 +    /* fr->ncg_force is unused in the standard code,
 +     * but it can be useful for modified code dealing with charge groups.
 +     */
 +    fr->ncg_force           = ncg_force;
 +    fr->natoms_force        = natoms_force;
 +    fr->natoms_force_constr = natoms_force_constr;
 +
 +    if (fr->natoms_force_constr > fr->nalloc_force)
 +    {
 +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 +
 +        if (fr->bTwinRange)
 +        {
 +            srenew(fr->f_twin, fr->nalloc_force);
 +        }
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        fr->f_novirsum_n = natoms_f_novirsum;
 +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
 +        {
 +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
 +            srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
 +        }
 +    }
 +    else
 +    {
 +        fr->f_novirsum_n = 0;
 +    }
 +}
 +
 +static real cutoff_inf(real cutoff)
 +{
 +    if (cutoff == 0)
 +    {
 +        cutoff = GMX_CUTOFF_INF;
 +    }
 +
 +    return cutoff;
 +}
 +
 +static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
 +                                  t_forcerec *fr, const t_inputrec *ir,
 +                                  const char *tabfn, const gmx_mtop_t *mtop,
 +                                  matrix     box)
 +{
 +    char buf[STRLEN];
 +    int  i, j;
 +
 +    if (tabfn == NULL)
 +    {
 +        gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
 +        return;
 +    }
 +
 +    snew(fr->atf_tabs, ir->adress->n_tf_grps);
 +
++    sprintf(buf, "%s", tabfn);
 +    for (i = 0; i < ir->adress->n_tf_grps; i++)
 +    {
 +        j = ir->adress->tf_table_index[i]; /* get energy group index */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
 +                *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
++        if(fp)
++        {
++           fprintf(fp,"loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
++        }
 +        fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
 +    }
 +
 +}
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                          gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
 +{
 +    gmx_bool bAllvsAll;
 +
 +    bAllvsAll =
 +        (
 +            ir->rlist == 0            &&
 +            ir->rcoulomb == 0         &&
 +            ir->rvdw == 0             &&
 +            ir->ePBC == epbcNONE      &&
 +            ir->vdwtype == evdwCUT    &&
 +            ir->coulombtype == eelCUT &&
 +            ir->efep == efepNO        &&
 +            (ir->implicit_solvent == eisNO ||
 +             (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
 +                                                  ir->gb_algorithm == egbHCT   ||
 +                                                  ir->gb_algorithm == egbOBC))) &&
 +            getenv("GMX_NO_ALLVSALL") == NULL
 +        );
 +
 +    if (bAllvsAll && ir->opts.ngener > 1)
 +    {
 +        const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
 +
 +        if (bPrintNote)
 +        {
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr, "\n%s\n", note);
 +            }
 +            if (fp != NULL)
 +            {
 +                fprintf(fp, "\n%s\n", note);
 +            }
 +        }
 +        bAllvsAll = FALSE;
 +    }
 +
 +    if (bAllvsAll && fp && MASTER(cr))
 +    {
 +        fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
 +    }
 +
 +    return bAllvsAll;
 +}
 +
 +
 +static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
 +{
 +    int t, i;
 +
 +    /* These thread local data structures are used for bondeds only */
 +    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
 +
 +    if (fr->nthreads > 1)
 +    {
 +        snew(fr->f_t, fr->nthreads);
 +        /* Thread 0 uses the global force and energy arrays */
 +        for (t = 1; t < fr->nthreads; t++)
 +        {
 +            fr->f_t[t].f        = NULL;
 +            fr->f_t[t].f_nalloc = 0;
 +            snew(fr->f_t[t].fshift, SHIFTS);
 +            fr->f_t[t].grpp.nener = nenergrp*nenergrp;
 +            for (i = 0; i < egNR; i++)
 +            {
 +                snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void pick_nbnxn_kernel_cpu(FILE             *fp,
 +                                  const t_commrec  *cr,
 +                                  const gmx_cpuid_t cpuid_info,
 +                                  const t_inputrec *ir,
 +                                  int              *kernel_type,
 +                                  int              *ewald_excl)
 +{
 +    *kernel_type = nbnxnk4x4_PlainC;
 +    *ewald_excl  = ewaldexclTable;
 +
 +#ifdef GMX_NBNXN_SIMD
 +    {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +        *kernel_type = nbnxnk4xN_SIMD_4xN;
 +#endif
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +        /* We expect the 2xNN kernels to be faster in most cases */
 +        *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +#endif
 +
 +#if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
 +        if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
 +        {
 +            /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
 +             * 10% with HT, 50% without HT, but extra zeros interactions
 +             * can compensate. As we currently don't detect the actual use
 +             * of HT, switch to 4x8 to avoid a potential performance hit.
 +             */
 +            *kernel_type = nbnxnk4xN_SIMD_4xN;
 +        }
 +#endif
 +        if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
 +        {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +            *kernel_type = nbnxnk4xN_SIMD_4xN;
 +#else
 +            gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
 +#endif
 +        }
 +        if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
 +        {
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +            *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +#else
 +            gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
 +#endif
 +        }
 +
 +        /* Analytical Ewald exclusion correction is only an option in the
 +         * x86 SIMD kernel. This is faster in single precision
 +         * on Bulldozer and slightly faster on Sandy Bridge.
 +         */
 +#if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
 +        *ewald_excl = ewaldexclAnalytical;
 +#endif
 +        if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
 +        {
 +            *ewald_excl = ewaldexclTable;
 +        }
 +        if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
 +        {
 +            *ewald_excl = ewaldexclAnalytical;
 +        }
 +
 +    }
 +#endif /* GMX_X86_SSE2 */
 +}
 +
 +
 +const char *lookup_nbnxn_kernel_name(int kernel_type)
 +{
 +    const char *returnvalue = NULL;
 +    switch (kernel_type)
 +    {
 +        case nbnxnkNotSet: returnvalue     = "not set"; break;
 +        case nbnxnk4x4_PlainC: returnvalue = "plain C"; break;
 +#ifndef GMX_NBNXN_SIMD
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "not available"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "not available"; break;
 +#else
 +#ifdef GMX_X86_SSE2
 +#if GMX_NBNXN_SIMD_BITWIDTH == 128
 +            /* x86 SIMD intrinsics can be converted to either SSE or AVX depending
 +             * on compiler flags. As we use nearly identical intrinsics, using an AVX
 +             * compiler flag without an AVX macro effectively results in AVX kernels.
 +             * For gcc we check for __AVX__
 +             * At least a check for icc should be added (if there is a macro)
 +             */
 +#if !(defined GMX_X86_AVX_128_FMA || defined __AVX__)
 +#ifndef GMX_X86_SSE4_1
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE2"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE2"; break;
 +#else
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE4.1"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE4.1"; break;
 +#endif
 +#else
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-128"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-128"; break;
 +#endif
 +#endif
 +#if GMX_NBNXN_SIMD_BITWIDTH == 256
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-256"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-256"; break;
 +#endif
 +#else   /* not GMX_X86_SSE2 */
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "SIMD"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "SIMD"; break;
 +#endif
 +#endif
 +        case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
 +        case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
 +
 +        case nbnxnkNR:
 +        default:
 +            gmx_fatal(FARGS, "Illegal kernel type selected");
 +            returnvalue = NULL;
 +            break;
 +    }
 +    return returnvalue;
 +};
 +
 +static void pick_nbnxn_kernel(FILE                *fp,
 +                              const t_commrec     *cr,
 +                              const gmx_hw_info_t *hwinfo,
 +                              gmx_bool             use_cpu_acceleration,
 +                              gmx_bool             bUseGPU,
 +                              gmx_bool             bEmulateGPU,
 +                              const t_inputrec    *ir,
 +                              int                 *kernel_type,
 +                              int                 *ewald_excl,
 +                              gmx_bool             bDoNonbonded)
 +{
 +    assert(kernel_type);
 +
 +    *kernel_type = nbnxnkNotSet;
 +    *ewald_excl  = ewaldexclTable;
 +
 +    if (bEmulateGPU)
 +    {
 +        *kernel_type = nbnxnk8x8x8_PlainC;
 +
 +        if (bDoNonbonded)
 +        {
 +            md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
 +        }
 +    }
 +    else if (bUseGPU)
 +    {
 +        *kernel_type = nbnxnk8x8x8_CUDA;
 +    }
 +
 +    if (*kernel_type == nbnxnkNotSet)
 +    {
 +        if (use_cpu_acceleration)
 +        {
 +            pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
 +                                  kernel_type, ewald_excl);
 +        }
 +        else
 +        {
 +            *kernel_type = nbnxnk4x4_PlainC;
 +        }
 +    }
 +
 +    if (bDoNonbonded && fp != NULL)
 +    {
 +        fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
 +                lookup_nbnxn_kernel_name(*kernel_type),
 +                nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
 +                nbnxn_kernel_to_cj_size(*kernel_type));
 +    }
 +}
 +
 +static void pick_nbnxn_resources(FILE                *fp,
 +                                 const t_commrec     *cr,
 +                                 const gmx_hw_info_t *hwinfo,
 +                                 gmx_bool             bDoNonbonded,
 +                                 gmx_bool            *bUseGPU,
 +                                 gmx_bool            *bEmulateGPU)
 +{
 +    gmx_bool bEmulateGPUEnvVarSet;
 +    char     gpu_err_str[STRLEN];
 +
 +    *bUseGPU = FALSE;
 +
 +    bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
 +
 +    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
 +     * GPUs (currently) only handle non-bonded calculations, we will
 +     * automatically switch to emulation if non-bonded calculations are
 +     * turned off via GMX_NO_NONBONDED - this is the simple and elegant
 +     * way to turn off GPU initialization, data movement, and cleanup.
 +     *
 +     * GPU emulation can be useful to assess the performance one can expect by
 +     * adding GPU(s) to the machine. The conditional below allows this even
 +     * if mdrun is compiled without GPU acceleration support.
 +     * Note that you should freezing the system as otherwise it will explode.
 +     */
 +    *bEmulateGPU = (bEmulateGPUEnvVarSet ||
 +                    (!bDoNonbonded && hwinfo->bCanUseGPU));
 +
 +    /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
 +     */
 +    if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
 +    {
 +        /* Each PP node will use the intra-node id-th device from the
 +         * list of detected/selected GPUs. */
 +        if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
 +        {
 +            /* At this point the init should never fail as we made sure that
 +             * we have all the GPUs we need. If it still does, we'll bail. */
 +            gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
 +                      cr->nodeid,
 +                      get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
 +                      gpu_err_str);
 +        }
 +
 +        /* Here we actually turn on hardware GPU acceleration */
 +        *bUseGPU = TRUE;
 +    }
 +}
 +
 +gmx_bool uses_simple_tables(int                 cutoff_scheme,
 +                            nonbonded_verlet_t *nbv,
 +                            int                 group)
 +{
 +    gmx_bool bUsesSimpleTables = TRUE;
 +    int      grp_index;
 +
 +    switch (cutoff_scheme)
 +    {
 +        case ecutsGROUP:
 +            bUsesSimpleTables = TRUE;
 +            break;
 +        case ecutsVERLET:
 +            assert(NULL != nbv && NULL != nbv->grp);
 +            grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
 +            bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
 +            break;
 +        default:
 +            gmx_incons("unimplemented");
 +    }
 +    return bUsesSimpleTables;
 +}
 +
 +static void init_ewald_f_table(interaction_const_t *ic,
 +                               gmx_bool             bUsesSimpleTables,
 +                               real                 rtab)
 +{
 +    real maxr;
 +
 +    if (bUsesSimpleTables)
 +    {
 +        /* With a spacing of 0.0005 we are at the force summation accuracy
 +         * for the SSE kernels for "normal" atomistic simulations.
 +         */
 +        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
 +                                                   ic->rcoulomb);
 +
 +        maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
 +        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
 +    }
 +    else
 +    {
 +        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
 +    }
 +
 +    sfree_aligned(ic->tabq_coul_FDV0);
 +    sfree_aligned(ic->tabq_coul_F);
 +    sfree_aligned(ic->tabq_coul_V);
 +
 +    /* Create the original table data in FDV0 */
 +    snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
 +    snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
 +    snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
 +    table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
 +                                ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
 +}
 +
 +void init_interaction_const_tables(FILE                *fp,
 +                                   interaction_const_t *ic,
 +                                   gmx_bool             bUsesSimpleTables,
 +                                   real                 rtab)
 +{
 +    real spacing;
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    {
 +        init_ewald_f_table(ic, bUsesSimpleTables, rtab);
 +
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
 +                    1/ic->tabq_scale, ic->tabq_size);
 +        }
 +    }
 +}
 +
 +void init_interaction_const(FILE                 *fp,
 +                            interaction_const_t **interaction_const,
 +                            const t_forcerec     *fr,
 +                            real                  rtab)
 +{
 +    interaction_const_t *ic;
 +    gmx_bool             bUsesSimpleTables = TRUE;
 +
 +    snew(ic, 1);
 +
 +    /* Just allocate something so we can free it */
 +    snew_aligned(ic->tabq_coul_FDV0, 16, 32);
 +    snew_aligned(ic->tabq_coul_F, 16, 32);
 +    snew_aligned(ic->tabq_coul_V, 16, 32);
 +
 +    ic->rlist       = fr->rlist;
 +    ic->rlistlong   = fr->rlistlong;
 +
 +    /* Lennard-Jones */
 +    ic->rvdw        = fr->rvdw;
 +    if (fr->vdw_modifier == eintmodPOTSHIFT)
 +    {
 +        ic->sh_invrc6 = pow(ic->rvdw, -6.0);
 +    }
 +    else
 +    {
 +        ic->sh_invrc6 = 0;
 +    }
 +
 +    /* Electrostatics */
 +    ic->eeltype     = fr->eeltype;
 +    ic->rcoulomb    = fr->rcoulomb;
 +    ic->epsilon_r   = fr->epsilon_r;
 +    ic->epsfac      = fr->epsfac;
 +
 +    /* Ewald */
 +    ic->ewaldcoeff  = fr->ewaldcoeff;
 +    if (fr->coulomb_modifier == eintmodPOTSHIFT)
 +    {
 +        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
 +    }
 +    else
 +    {
 +        ic->sh_ewald = 0;
 +    }
 +
 +    /* Reaction-field */
 +    if (EEL_RF(ic->eeltype))
 +    {
 +        ic->epsilon_rf = fr->epsilon_rf;
 +        ic->k_rf       = fr->k_rf;
 +        ic->c_rf       = fr->c_rf;
 +    }
 +    else
 +    {
 +        /* For plain cut-off we might use the reaction-field kernels */
 +        ic->epsilon_rf = ic->epsilon_r;
 +        ic->k_rf       = 0;
 +        if (fr->coulomb_modifier == eintmodPOTSHIFT)
 +        {
 +            ic->c_rf   = 1/ic->rcoulomb;
 +        }
 +        else
 +        {
 +            ic->c_rf   = 0;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
 +                sqr(ic->sh_invrc6), ic->sh_invrc6);
 +        if (ic->eeltype == eelCUT)
 +        {
 +            fprintf(fp, ", Coulomb %.3f", ic->c_rf);
 +        }
 +        else if (EEL_PME(ic->eeltype))
 +        {
 +            fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
 +        }
 +        fprintf(fp, "\n");
 +    }
 +
 +    *interaction_const = ic;
 +
 +    if (fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
 +    }
 +
 +    bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
 +    init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
 +}
 +
 +static void init_nb_verlet(FILE                *fp,
 +                           nonbonded_verlet_t **nb_verlet,
 +                           const t_inputrec    *ir,
 +                           const t_forcerec    *fr,
 +                           const t_commrec     *cr,
 +                           const char          *nbpu_opt)
 +{
 +    nonbonded_verlet_t *nbv;
 +    int                 i;
 +    char               *env;
 +    gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
 +
 +    nbnxn_alloc_t      *nb_alloc;
 +    nbnxn_free_t       *nb_free;
 +
 +    snew(nbv, 1);
 +
 +    pick_nbnxn_resources(fp, cr, fr->hwinfo,
 +                         fr->bNonbonded,
 +                         &nbv->bUseGPU,
 +                         &bEmulateGPU);
 +
 +    nbv->nbs = NULL;
 +
 +    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
 +    for (i = 0; i < nbv->ngrp; i++)
 +    {
 +        nbv->grp[i].nbl_lists.nnbl = 0;
 +        nbv->grp[i].nbat           = NULL;
 +        nbv->grp[i].kernel_type    = nbnxnkNotSet;
 +
 +        if (i == 0) /* local */
 +        {
 +            pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                              nbv->bUseGPU, bEmulateGPU,
 +                              ir,
 +                              &nbv->grp[i].kernel_type,
 +                              &nbv->grp[i].ewald_excl,
 +                              fr->bNonbonded);
 +        }
 +        else /* non-local */
 +        {
 +            if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
 +            {
 +                /* Use GPU for local, select a CPU kernel for non-local */
 +                pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                                  FALSE, FALSE,
 +                                  ir,
 +                                  &nbv->grp[i].kernel_type,
 +                                  &nbv->grp[i].ewald_excl,
 +                                  fr->bNonbonded);
 +
 +                bHybridGPURun = TRUE;
 +            }
 +            else
 +            {
 +                /* Use the same kernel for local and non-local interactions */
 +                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
 +                nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
 +            }
 +        }
 +    }
 +
 +    if (nbv->bUseGPU)
 +    {
 +        /* init the NxN GPU data; the last argument tells whether we'll have
 +         * both local and non-local NB calculation on GPU */
 +        nbnxn_cuda_init(fp, &nbv->cu_nbv,
 +                        &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
 +                        (nbv->ngrp > 1) && !bHybridGPURun);
 +
 +        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
 +        {
 +            char *end;
 +
 +            nbv->min_ci_balanced = strtol(env, &end, 10);
 +            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            {
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +            }
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +        else
 +        {
 +            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nbv->min_ci_balanced = 0;
 +    }
 +
 +    *nb_verlet = nbv;
 +
 +    nbnxn_init_search(&nbv->nbs,
 +                      DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
 +                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
 +                      gmx_omp_nthreads_get(emntNonbonded));
 +
 +    for (i = 0; i < nbv->ngrp; i++)
 +    {
 +        if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
 +        {
 +            nb_alloc = &pmalloc;
 +            nb_free  = &pfree;
 +        }
 +        else
 +        {
 +            nb_alloc = NULL;
 +            nb_free  = NULL;
 +        }
 +
 +        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                /* 8x8x8 "non-simple" lists are ATM always combined */
 +                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                nb_alloc, nb_free);
 +
 +        if (i == 0 ||
 +            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
 +        {
 +            snew(nbv->grp[i].nbat, 1);
 +            nbnxn_atomdata_init(fp,
 +                                nbv->grp[i].nbat,
 +                                nbv->grp[i].kernel_type,
 +                                fr->ntype, fr->nbfp,
 +                                ir->opts.ngener,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
 +                                nb_alloc, nb_free);
 +        }
 +        else
 +        {
 +            nbv->grp[i].nbat = nbv->grp[0].nbat;
 +        }
 +    }
 +}
 +
 +void init_forcerec(FILE              *fp,
 +                   const output_env_t oenv,
 +                   t_forcerec        *fr,
 +                   t_fcdata          *fcd,
 +                   const t_inputrec  *ir,
 +                   const gmx_mtop_t  *mtop,
 +                   const t_commrec   *cr,
 +                   matrix             box,
 +                   gmx_bool           bMolEpot,
 +                   const char        *tabfn,
 +                   const char        *tabafn,
 +                   const char        *tabpfn,
 +                   const char        *tabbfn,
 +                   const char        *nbpu_opt,
 +                   gmx_bool           bNoSolvOpt,
 +                   real               print_force)
 +{
 +    int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
 +    real           rtab;
 +    char          *env;
 +    double         dbl;
 +    rvec           box_size;
 +    const t_block *cgs;
 +    gmx_bool       bGenericKernelOnly;
 +    gmx_bool       bTab, bSep14tab, bNormalnblists;
 +    t_nblists     *nbl;
 +    int           *nm_ind, egp_flags;
 +
 +    if (fr->hwinfo == NULL)
 +    {
 +        /* Detect hardware, gather information.
 +         * In mdrun, hwinfo has already been set before calling init_forcerec.
 +         * Here we ignore GPUs, as tools will not use them anyhow.
 +         */
 +        snew(fr->hwinfo, 1);
 +        gmx_detect_hardware(fp, fr->hwinfo, cr,
 +                            FALSE, FALSE, NULL);
 +    }
 +
 +    /* By default we turn acceleration on, but it might be turned off further down... */
 +    fr->use_cpu_acceleration = TRUE;
 +
 +    fr->bDomDec = DOMAINDECOMP(cr);
 +
 +    natoms = mtop->natoms;
 +
 +    if (check_box(ir->ePBC, box))
 +    {
 +        gmx_fatal(FARGS, check_box(ir->ePBC, box));
 +    }
 +
 +    /* Test particle insertion ? */
 +    if (EI_TPI(ir->eI))
 +    {
 +        /* Set to the size of the molecule to be inserted (the last one) */
 +        /* Because of old style topologies, we have to use the last cg
 +         * instead of the last molecule type.
 +         */
 +        cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
 +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
 +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
 +        {
 +            gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
 +        }
 +    }
 +    else
 +    {
 +        fr->n_tpi = 0;
 +    }
 +
 +    /* Copy AdResS parameters */
 +    if (ir->bAdress)
 +    {
 +        fr->adress_type           = ir->adress->type;
 +        fr->adress_const_wf       = ir->adress->const_wf;
 +        fr->adress_ex_width       = ir->adress->ex_width;
 +        fr->adress_hy_width       = ir->adress->hy_width;
 +        fr->adress_icor           = ir->adress->icor;
 +        fr->adress_site           = ir->adress->site;
 +        fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
 +        fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 +
 +
 +        snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
 +        for (i = 0; i < ir->adress->n_energy_grps; i++)
 +        {
 +            fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
 +        }
 +
 +        fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 +        snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 +        for (i = 0; i < fr->n_adress_tf_grps; i++)
 +        {
 +            fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
 +        }
 +        copy_rvec(ir->adress->refs, fr->adress_refs);
 +    }
 +    else
 +    {
 +        fr->adress_type           = eAdressOff;
 +        fr->adress_do_hybridpairs = FALSE;
 +    }
 +
 +    /* Copy the user determined parameters */
 +    fr->userint1  = ir->userint1;
 +    fr->userint2  = ir->userint2;
 +    fr->userint3  = ir->userint3;
 +    fr->userint4  = ir->userint4;
 +    fr->userreal1 = ir->userreal1;
 +    fr->userreal2 = ir->userreal2;
 +    fr->userreal3 = ir->userreal3;
 +    fr->userreal4 = ir->userreal4;
 +
 +    /* Shell stuff */
 +    fr->fc_stepsize = ir->fc_stepsize;
 +
 +    /* Free energy */
 +    fr->efep        = ir->efep;
 +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
 +    if (ir->fepvals->bScCoul)
 +    {
 +        fr->sc_alphacoul  = ir->fepvals->sc_alpha;
 +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
 +    }
 +    else
 +    {
 +        fr->sc_alphacoul  = 0;
 +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
 +    }
 +    fr->sc_power      = ir->fepvals->sc_power;
 +    fr->sc_r_power    = ir->fepvals->sc_r_power;
 +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
 +
 +    env = getenv("GMX_SCSIGMA_MIN");
 +    if (env != NULL)
 +    {
 +        dbl = 0;
 +        sscanf(env, "%lf", &dbl);
 +        fr->sc_sigma6_min = pow(dbl, 6);
 +        if (fp)
 +        {
 +            fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
 +        }
 +    }
 +
 +    fr->bNonbonded = TRUE;
 +    if (getenv("GMX_NO_NONBONDED") != NULL)
 +    {
 +        /* turn off non-bonded calculations */
 +        fr->bNonbonded = FALSE;
 +        md_print_warn(cr, fp,
 +                      "Found environment variable GMX_NO_NONBONDED.\n"
 +                      "Disabling nonbonded calculations.\n");
 +    }
 +
 +    bGenericKernelOnly = FALSE;
 +
 +    /* We now check in the NS code whether a particular combination of interactions
 +     * can be used with water optimization, and disable it if that is not the case.
 +     */
 +
 +    if (getenv("GMX_NB_GENERIC") != NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "Found environment variable GMX_NB_GENERIC.\n"
 +                    "Disabling all interaction-specific nonbonded kernels, will only\n"
 +                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
 +        }
 +        bGenericKernelOnly = TRUE;
 +    }
 +
 +    if (bGenericKernelOnly == TRUE)
 +    {
 +        bNoSolvOpt         = TRUE;
 +    }
 +
 +    if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
 +    {
 +        fr->use_cpu_acceleration = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
 +                    "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
 +        }
 +    }
 +
 +    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +
 +    /* Check if we can/should do all-vs-all kernels */
 +    fr->bAllvsAll       = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
 +    fr->AllvsAll_work   = NULL;
 +    fr->AllvsAll_workgb = NULL;
 +
 +
 +    /* Neighbour searching stuff */
 +    fr->cutoff_scheme = ir->cutoff_scheme;
 +    fr->bGrid         = (ir->ns_type == ensGRID);
 +    fr->ePBC          = ir->ePBC;
 +
 +    /* Determine if we will do PBC for distances in bonded interactions */
 +    if (fr->ePBC == epbcNONE)
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            /* The group cut-off scheme and SHAKE assume charge groups
 +             * are whole, but not using molpbc is faster in most cases.
 +             */
 +            if (fr->cutoff_scheme == ecutsGROUP ||
 +                (ir->eConstrAlg == econtSHAKE &&
 +                 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
 +            {
 +                fr->bMolPBC = ir->bPeriodicMols;
 +            }
 +            else
 +            {
 +                fr->bMolPBC = TRUE;
 +                if (getenv("GMX_USE_GRAPH") != NULL)
 +                {
 +                    fr->bMolPBC = FALSE;
 +                    if (fp)
 +                    {
 +                        fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
 +        }
 +    }
 +    fr->bGB = (ir->implicit_solvent == eisGBSA);
 +
 +    fr->rc_scaling = ir->refcoord_scaling;
 +    copy_rvec(ir->posres_com, fr->posres_com);
 +    copy_rvec(ir->posres_comB, fr->posres_comB);
 +    fr->rlist      = cutoff_inf(ir->rlist);
 +    fr->rlistlong  = cutoff_inf(ir->rlistlong);
 +    fr->eeltype    = ir->coulombtype;
 +    fr->vdwtype    = ir->vdwtype;
 +
 +    fr->coulomb_modifier = ir->coulomb_modifier;
 +    fr->vdw_modifier     = ir->vdw_modifier;
 +
 +    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
 +    switch (fr->eeltype)
 +    {
 +        case eelCUT:
 +            fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
 +            break;
 +
 +        case eelRF:
 +        case eelGRF:
 +        case eelRF_NEC:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            break;
 +
 +        case eelRF_ZERO:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
 +            break;
 +
 +        case eelSWITCH:
 +        case eelSHIFT:
 +        case eelUSER:
 +        case eelENCADSHIFT:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            break;
 +
 +        case eelPME:
 +        case eelEWALD:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
 +            break;
 +    }
 +
 +    /* Vdw: Translate from mdp settings to kernel format */
 +    switch (fr->vdwtype)
 +    {
 +        case evdwCUT:
 +            if (fr->bBHAM)
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
 +            }
 +            else
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
 +            }
 +            break;
 +
 +        case evdwSWITCH:
 +        case evdwSHIFT:
 +        case evdwUSER:
 +        case evdwENCADSHIFT:
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
 +            break;
 +    }
 +
 +    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
 +    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
 +    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
 +
 +    fr->bTwinRange = fr->rlistlong > fr->rlist;
 +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
 +
 +    fr->reppow     = mtop->ffparams.reppow;
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
 +                          !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
 +        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
 +        fr->bcoultab   = !(fr->eeltype == eelCUT ||
 +                           fr->eeltype == eelEWALD ||
 +                           fr->eeltype == eelPME ||
 +                           fr->eeltype == eelRF ||
 +                           fr->eeltype == eelRF_ZERO);
 +
 +        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
 +         * going to be faster to tabulate the interaction than calling the generic kernel.
 +         */
 +        if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
 +        {
 +            if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +        else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
 +                 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
 +                   fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
 +                   (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
 +        {
 +            if (fr->rcoulomb != fr->rvdw)
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +
 +        if (getenv("GMX_REQUIRE_TABLES"))
 +        {
 +            fr->bvdwtab  = TRUE;
 +            fr->bcoultab = TRUE;
 +        }
 +
 +        if (fp)
 +        {
 +            fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
 +            fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
 +        }
 +
 +        if (fr->bvdwtab == TRUE)
 +        {
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            fr->nbkernel_vdw_modifier    = eintmodNONE;
 +        }
 +        if (fr->bcoultab == TRUE)
 +        {
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            fr->nbkernel_elec_modifier    = eintmodNONE;
 +        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +        {
 +            gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
 +        }
 +        fr->bvdwtab  = FALSE;
 +        fr->bcoultab = FALSE;
 +    }
 +
 +    /* Tables are used for direct ewald sum */
 +    if (fr->bEwald)
 +    {
 +        if (EEL_PME(ir->coulombtype))
 +        {
 +            if (fp)
 +            {
 +                fprintf(fp, "Will do PME sum in reciprocal space.\n");
 +            }
 +            if (ir->coulombtype == eelP3M_AD)
 +            {
 +                please_cite(fp, "Hockney1988");
 +                please_cite(fp, "Ballenegger2012");
 +            }
 +            else
 +            {
 +                please_cite(fp, "Essmann95a");
 +            }
 +
 +            if (ir->ewald_geometry == eewg3DC)
 +            {
 +                if (fp)
 +                {
 +                    fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
 +                }
 +                please_cite(fp, "In-Chul99a");
 +            }
 +        }
 +        fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
 +        init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
 +        if (fp)
 +        {
 +            fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
 +                    1/fr->ewaldcoeff);
 +        }
 +    }
 +
 +    /* Electrostatics */
 +    fr->epsilon_r       = ir->epsilon_r;
 +    fr->epsilon_rf      = ir->epsilon_rf;
 +    fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
 +    fr->rcoulomb_switch = ir->rcoulomb_switch;
 +    fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
 +
 +    /* Parameters for generalized RF */
 +    fr->zsquare = 0.0;
 +    fr->temp    = 0.0;
 +
 +    if (fr->eeltype == eelGRF)
 +    {
 +        init_generalized_rf(fp, mtop, ir, fr);
 +    }
 +    else if (fr->eeltype == eelSHIFT)
 +    {
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            box_size[m] = box[m][m];
 +        }
 +
 +        if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
 +        {
 +            set_shift_consts(fp, fr->rcoulomb_switch, fr->rcoulomb, box_size, fr);
 +        }
 +    }
 +
 +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
 +                       gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
 +                       gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
 +                       IR_ELEC_FIELD(*ir) ||
 +                       (fr->adress_icor != eAdressICOff)
 +                       );
 +
 +    if (fr->cutoff_scheme == ecutsGROUP &&
 +        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
 +    {
 +        /* Count the total number of charge groups */
 +        fr->cg_nalloc = ncg_mtop(mtop);
 +        srenew(fr->cg_cm, fr->cg_nalloc);
 +    }
 +    if (fr->shift_vec == NULL)
 +    {
 +        snew(fr->shift_vec, SHIFTS);
 +    }
 +
 +    if (fr->fshift == NULL)
 +    {
 +        snew(fr->fshift, SHIFTS);
 +    }
 +
 +    if (fr->nbfp == NULL)
 +    {
 +        fr->ntype = mtop->ffparams.atnr;
 +        fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
 +    }
 +
 +    /* Copy the energy group exclusions */
 +    fr->egp_flags = ir->opts.egp_flags;
 +
 +    /* Van der Waals stuff */
 +    fr->rvdw        = cutoff_inf(ir->rvdw);
 +    fr->rvdw_switch = ir->rvdw_switch;
 +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
 +    {
 +        if (fr->rvdw_switch >= fr->rvdw)
 +        {
 +            gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
 +                      fr->rvdw_switch, fr->rvdw);
 +        }
 +        if (fp)
 +        {
 +            fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
 +                    (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
 +                    fr->rvdw_switch, fr->rvdw);
 +        }
 +    }
 +
 +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
 +    {
 +        gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
 +    }
 +
 +    if (fp)
 +    {
 +        fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
 +                fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
 +    }
 +
 +    fr->eDispCorr = ir->eDispCorr;
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        set_avcsixtwelve(fp, fr, mtop);
 +    }
 +
 +    if (fr->bBHAM)
 +    {
 +        set_bham_b_max(fp, fr, mtop);
 +    }
 +
 +    fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 +
 +    /* Copy the GBSA data (radius, volume and surftens for each
 +     * atomtype) from the topology atomtype section to forcerec.
 +     */
 +    snew(fr->atype_radius, fr->ntype);
 +    snew(fr->atype_vol, fr->ntype);
 +    snew(fr->atype_surftens, fr->ntype);
 +    snew(fr->atype_gb_radius, fr->ntype);
 +    snew(fr->atype_S_hct, fr->ntype);
 +
 +    if (mtop->atomtypes.nr > 0)
 +    {
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_radius[i] = mtop->atomtypes.radius[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
 +        }
 +    }
 +
 +    /* Generate the GB table if needed */
 +    if (fr->bGB)
 +    {
 +#ifdef GMX_DOUBLE
 +        fr->gbtabscale = 2000;
 +#else
 +        fr->gbtabscale = 500;
 +#endif
 +
 +        fr->gbtabr = 100;
 +        fr->gbtab  = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
 +
 +        init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
 +
 +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            make_local_gb(cr, fr->born, ir->gb_algorithm);
 +        }
 +    }
 +
 +    /* Set the charge scaling */
 +    if (fr->epsilon_r != 0)
 +    {
 +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
 +    }
 +    else
 +    {
 +        /* eps = 0 is infinite dieletric: no coulomb interactions */
 +        fr->epsfac = 0;
 +    }
 +
 +    /* Reaction field constants */
 +    if (EEL_RF(fr->eeltype))
 +    {
 +        calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 +                   fr->rcoulomb, fr->temp, fr->zsquare, box,
 +                   &fr->kappa, &fr->k_rf, &fr->c_rf);
 +    }
 +
 +    set_chargesum(fp, fr, mtop);
 +
 +    /* if we are using LR electrostatics, and they are tabulated,
 +     * the tables will contain modified coulomb interactions.
 +     * Since we want to use the non-shifted ones for 1-4
 +     * coulombic interactions, we must have an extra set of tables.
 +     */
 +
 +    /* Construct tables.
 +     * A little unnecessary to make both vdw and coul tables sometimes,
 +     * but what the heck... */
 +
 +    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
 +
 +    bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
 +                  fr->bBHAM || fr->bEwald) &&
 +                 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
 +
 +    negp_pp   = ir->opts.ngener - ir->nwall;
 +    negptable = 0;
 +    if (!bTab)
 +    {
 +        bNormalnblists = TRUE;
 +        fr->nnblists   = 1;
 +    }
 +    else
 +    {
 +        bNormalnblists = (ir->eDispCorr != edispcNO);
 +        for (egi = 0; egi < negp_pp; egi++)
 +        {
 +            for (egj = egi; egj < negp_pp; egj++)
 +            {
 +                egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
 +                if (!(egp_flags & EGP_EXCL))
 +                {
 +                    if (egp_flags & EGP_TABLE)
 +                    {
 +                        negptable++;
 +                    }
 +                    else
 +                    {
 +                        bNormalnblists = TRUE;
 +                    }
 +                }
 +            }
 +        }
 +        if (bNormalnblists)
 +        {
 +            fr->nnblists = negptable + 1;
 +        }
 +        else
 +        {
 +            fr->nnblists = negptable;
 +        }
 +        if (fr->nnblists > 1)
 +        {
 +            snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
 +        }
 +    }
 +
 +    if (ir->adress)
 +    {
 +        fr->nnblists *= 2;
 +    }
 +
 +    snew(fr->nblists, fr->nnblists);
 +
 +    /* This code automatically gives table length tabext without cut-off's,
 +     * in that case grompp should already have checked that we do not need
 +     * normal tables and we only generate tables for 1-4 interactions.
 +     */
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (bTab)
 +    {
 +        /* make tables for ordinary interactions */
 +        if (bNormalnblists)
 +        {
 +            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
 +            if (ir->adress)
 +            {
 +                make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
 +            }
 +            if (!bSep14tab)
 +            {
 +                fr->tab14 = fr->nblists[0].table_elec_vdw;
 +            }
 +            m = 1;
 +        }
 +        else
 +        {
 +            m = 0;
 +        }
 +        if (negptable > 0)
 +        {
 +            /* Read the special tables for certain energy group pairs */
 +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
 +            for (egi = 0; egi < negp_pp; egi++)
 +            {
 +                for (egj = egi; egj < negp_pp; egj++)
 +                {
 +                    egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
 +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
 +                    {
 +                        nbl = &(fr->nblists[m]);
 +                        if (fr->nnblists > 1)
 +                        {
 +                            fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
 +                        }
 +                        /* Read the table file with the two energy groups names appended */
 +                        make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                                        *mtop->groups.grpname[nm_ind[egi]],
 +                                        *mtop->groups.grpname[nm_ind[egj]],
 +                                        &fr->nblists[m]);
 +                        if (ir->adress)
 +                        {
 +                            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                                            *mtop->groups.grpname[nm_ind[egi]],
 +                                            *mtop->groups.grpname[nm_ind[egj]],
 +                                            &fr->nblists[fr->nnblists/2+m]);
 +                        }
 +                        m++;
 +                    }
 +                    else if (fr->nnblists > 1)
 +                    {
 +                        fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (bSep14tab)
 +    {
 +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 +        fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
 +                                GMX_MAKETABLES_14ONLY);
 +    }
 +
 +    /* Read AdResS Thermo Force table if needed */
 +    if (fr->adress_icor == eAdressICThermoForce)
 +    {
 +        /* old todo replace */
 +
 +        if (ir->adress->n_tf_grps > 0)
 +        {
 +            make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
 +
 +        }
 +        else
 +        {
 +            /* load the default table */
 +            snew(fr->atf_tabs, 1);
 +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
 +        }
 +    }
 +
 +    /* Wall stuff */
 +    fr->nwall = ir->nwall;
 +    if (ir->nwall && ir->wall_type == ewtTABLE)
 +    {
 +        make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
 +    }
 +
 +    if (fcd && tabbfn)
 +    {
 +        fcd->bondtab  = make_bonded_tables(fp,
 +                                           F_TABBONDS, F_TABBONDSNC,
 +                                           mtop, tabbfn, "b");
 +        fcd->angletab = make_bonded_tables(fp,
 +                                           F_TABANGLES, -1,
 +                                           mtop, tabbfn, "a");
 +        fcd->dihtab   = make_bonded_tables(fp,
 +                                           F_TABDIHS, -1,
 +                                           mtop, tabbfn, "d");
 +    }
 +    else
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
 +        }
 +    }
 +
 +    /* QM/MM initialization if requested
 +     */
 +    if (ir->bQMMM)
 +    {
 +        fprintf(stderr, "QM/MM calculation requested.\n");
 +    }
 +
 +    fr->bQMMM      = ir->bQMMM;
 +    fr->qr         = mk_QMMMrec();
 +
 +    /* Set all the static charge group info */
 +    fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
 +                                   &fr->bExcl_IntraCGAll_InterCGNone);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        fr->cginfo = NULL;
 +    }
 +    else
 +    {
 +        fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
 +    }
 +
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* When using particle decomposition, the effect of the second argument,
 +         * which sets fr->hcg, is corrected later in do_md and init_em.
 +         */
 +        forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
 +                            mtop->natoms, mtop->natoms, mtop->natoms);
 +    }
 +
 +    fr->print_force = print_force;
 +
 +
 +    /* coarse load balancing vars */
 +    fr->t_fnbf    = 0.;
 +    fr->t_wait    = 0.;
 +    fr->timesteps = 0;
 +
 +    /* Initialize neighbor search */
 +    init_ns(fp, cr, &fr->ns, fr, mtop, box);
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
 +        /*
 +           if (ir->bAdress)
 +            {
 +                gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 +            }
 +         */
 +    }
 +
 +    /* Initialize the thread working data for bonded interactions */
 +    init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
 +
 +    snew(fr->excl_load, fr->nthreads+1);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
 +        }
 +
 +        init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
 +    }
 +
 +    /* fr->ic is used both by verlet and group kernels (to some extent) now */
 +    init_interaction_const(fp, &fr->ic, fr, rtab);
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        calc_enervirdiff(fp, ir->eDispCorr, fr);
 +    }
 +}
 +
 +#define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
 +#define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
 +#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
 +
 +void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
 +{
 +    int i;
 +
 +    pr_real(fp, fr->rlist);
 +    pr_real(fp, fr->rcoulomb);
 +    pr_real(fp, fr->fudgeQQ);
 +    pr_bool(fp, fr->bGrid);
 +    pr_bool(fp, fr->bTwinRange);
 +    /*pr_int(fp,fr->cg0);
 +       pr_int(fp,fr->hcg);*/
 +    for (i = 0; i < fr->nnblists; i++)
 +    {
 +        pr_int(fp, fr->nblists[i].table_elec_vdw.n);
 +    }
 +    pr_real(fp, fr->rcoulomb_switch);
 +    pr_real(fp, fr->rcoulomb);
 +
 +    fflush(fp);
 +}
 +
 +void forcerec_set_excl_load(t_forcerec *fr,
 +                            const gmx_localtop_t *top, const t_commrec *cr)
 +{
 +    const int *ind, *a;
 +    int        t, i, j, ntot, n, ntarget;
 +
 +    if (cr != NULL && PARTDECOMP(cr))
 +    {
 +        /* No OpenMP with particle decomposition */
 +        pd_at_range(cr,
 +                    &fr->excl_load[0],
 +                    &fr->excl_load[1]);
 +
 +        return;
 +    }
 +
 +    ind = top->excls.index;
 +    a   = top->excls.a;
 +
 +    ntot = 0;
 +    for (i = 0; i < top->excls.nr; i++)
 +    {
 +        for (j = ind[i]; j < ind[i+1]; j++)
 +        {
 +            if (a[j] > i)
 +            {
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    fr->excl_load[0] = 0;
 +    n                = 0;
 +    i                = 0;
 +    for (t = 1; t <= fr->nthreads; t++)
 +    {
 +        ntarget = (ntot*t)/fr->nthreads;
 +        while (i < top->excls.nr && n < ntarget)
 +        {
 +            for (j = ind[i]; j < ind[i+1]; j++)
 +            {
 +                if (a[j] > i)
 +                {
 +                    n++;
 +                }
 +            }
 +            i++;
 +        }
 +        fr->excl_load[t] = i;
 +    }
 +}
index b17234f92b02806b86dcc4357cfd339af96bfe8f,0000000000000000000000000000000000000000..da84f681dd110ab00462ca52f6e36e848ae713a6
mode 100644,000000..100644
--- /dev/null
@@@ -1,1341 -1,0 +1,1348 @@@
-             gmx_incons("Unsupported stride");
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "vec.h"
 +#include "nbnxn_consts.h"
 +#include "nbnxn_internal.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "gmx_omp_nthreads.h"
 +
 +/* Default nbnxn allocation routine, allocates NBNXN_MEM_ALIGN byte aligned */
 +void nbnxn_alloc_aligned(void **ptr, size_t nbytes)
 +{
 +    *ptr = save_malloc_aligned("ptr", __FILE__, __LINE__, nbytes, 1, NBNXN_MEM_ALIGN);
 +}
 +
 +/* Free function for memory allocated with nbnxn_alloc_aligned */
 +void nbnxn_free_aligned(void *ptr)
 +{
 +    sfree_aligned(ptr);
 +}
 +
 +/* Reallocation wrapper function for nbnxn data structures */
 +void nbnxn_realloc_void(void **ptr,
 +                        int nbytes_copy, int nbytes_new,
 +                        nbnxn_alloc_t *ma,
 +                        nbnxn_free_t  *mf)
 +{
 +    void *ptr_new;
 +
 +    ma(&ptr_new, nbytes_new);
 +
 +    if (nbytes_new > 0 && ptr_new == NULL)
 +    {
 +        gmx_fatal(FARGS, "Allocation of %d bytes failed", nbytes_new);
 +    }
 +
 +    if (nbytes_copy > 0)
 +    {
 +        if (nbytes_new < nbytes_copy)
 +        {
 +            gmx_incons("In nbnxn_realloc_void: new size less than copy size");
 +        }
 +        memcpy(ptr_new, *ptr, nbytes_copy);
 +    }
 +    if (*ptr != NULL)
 +    {
 +        mf(*ptr);
 +    }
 +    *ptr = ptr_new;
 +}
 +
 +/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
 +void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n)
 +{
 +    int t;
 +
 +    nbnxn_realloc_void((void **)&nbat->type,
 +                       nbat->natoms*sizeof(*nbat->type),
 +                       n*sizeof(*nbat->type),
 +                       nbat->alloc, nbat->free);
 +    nbnxn_realloc_void((void **)&nbat->lj_comb,
 +                       nbat->natoms*2*sizeof(*nbat->lj_comb),
 +                       n*2*sizeof(*nbat->lj_comb),
 +                       nbat->alloc, nbat->free);
 +    if (nbat->XFormat != nbatXYZQ)
 +    {
 +        nbnxn_realloc_void((void **)&nbat->q,
 +                           nbat->natoms*sizeof(*nbat->q),
 +                           n*sizeof(*nbat->q),
 +                           nbat->alloc, nbat->free);
 +    }
 +    if (nbat->nenergrp > 1)
 +    {
 +        nbnxn_realloc_void((void **)&nbat->energrp,
 +                           nbat->natoms/nbat->na_c*sizeof(*nbat->energrp),
 +                           n/nbat->na_c*sizeof(*nbat->energrp),
 +                           nbat->alloc, nbat->free);
 +    }
 +    nbnxn_realloc_void((void **)&nbat->x,
 +                       nbat->natoms*nbat->xstride*sizeof(*nbat->x),
 +                       n*nbat->xstride*sizeof(*nbat->x),
 +                       nbat->alloc, nbat->free);
 +    for (t = 0; t < nbat->nout; t++)
 +    {
 +        /* Allocate one element extra for possible signaling with CUDA */
 +        nbnxn_realloc_void((void **)&nbat->out[t].f,
 +                           nbat->natoms*nbat->fstride*sizeof(*nbat->out[t].f),
 +                           n*nbat->fstride*sizeof(*nbat->out[t].f),
 +                           nbat->alloc, nbat->free);
 +    }
 +    nbat->nalloc = n;
 +}
 +
 +/* Initializes an nbnxn_atomdata_output_t data structure */
 +static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
 +                                       int nb_kernel_type,
 +                                       int nenergrp, int stride,
 +                                       nbnxn_alloc_t *ma)
 +{
 +    int cj_size;
 +
 +    out->f = NULL;
 +    ma((void **)&out->fshift, SHIFTS*DIM*sizeof(*out->fshift));
 +    out->nV = nenergrp*nenergrp;
 +    ma((void **)&out->Vvdw, out->nV*sizeof(*out->Vvdw));
 +    ma((void **)&out->Vc, out->nV*sizeof(*out->Vc  ));
 +
 +    if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
 +        nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
 +    {
 +        cj_size  = nbnxn_kernel_to_cj_size(nb_kernel_type);
 +        out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size;
 +        ma((void **)&out->VSvdw, out->nVS*sizeof(*out->VSvdw));
 +        ma((void **)&out->VSc, out->nVS*sizeof(*out->VSc  ));
 +    }
 +    else
 +    {
 +        out->nVS = 0;
 +    }
 +}
 +
 +static void copy_int_to_nbat_int(const int *a, int na, int na_round,
 +                                 const int *in, int fill, int *innb)
 +{
 +    int i, j;
 +
 +    j = 0;
 +    for (i = 0; i < na; i++)
 +    {
 +        innb[j++] = in[a[i]];
 +    }
 +    /* Complete the partially filled last cell with fill */
 +    for (; i < na_round; i++)
 +    {
 +        innb[j++] = fill;
 +    }
 +}
 +
 +static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0)
 +{
 +    int a, d, j, c;
 +
 +    switch (nbatFormat)
 +    {
 +        case nbatXYZ:
 +            for (a = 0; a < na; a++)
 +            {
 +                for (d = 0; d < DIM; d++)
 +                {
 +                    xnb[(a0+a)*STRIDE_XYZ+d] = 0;
 +                }
 +            }
 +            break;
 +        case nbatXYZQ:
 +            for (a = 0; a < na; a++)
 +            {
 +                for (d = 0; d < DIM; d++)
 +                {
 +                    xnb[(a0+a)*STRIDE_XYZQ+d] = 0;
 +                }
 +            }
 +            break;
 +        case nbatX4:
 +            j = X4_IND_A(a0);
 +            c = a0 & (PACK_X4-1);
 +            for (a = 0; a < na; a++)
 +            {
 +                xnb[j+XX*PACK_X4] = 0;
 +                xnb[j+YY*PACK_X4] = 0;
 +                xnb[j+ZZ*PACK_X4] = 0;
 +                j++;
 +                c++;
 +                if (c == PACK_X4)
 +                {
 +                    j += (DIM-1)*PACK_X4;
 +                    c  = 0;
 +                }
 +            }
 +            break;
 +        case nbatX8:
 +            j = X8_IND_A(a0);
 +            c = a0 & (PACK_X8-1);
 +            for (a = 0; a < na; a++)
 +            {
 +                xnb[j+XX*PACK_X8] = 0;
 +                xnb[j+YY*PACK_X8] = 0;
 +                xnb[j+ZZ*PACK_X8] = 0;
 +                j++;
 +                c++;
 +                if (c == PACK_X8)
 +                {
 +                    j += (DIM-1)*PACK_X8;
 +                    c  = 0;
 +                }
 +            }
 +            break;
 +    }
 +}
 +
 +void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
 +                            rvec *x, int nbatFormat, real *xnb, int a0,
 +                            int cx, int cy, int cz)
 +{
 +    int i, j, c;
 +
 +/* We might need to place filler particles to fill up the cell to na_round.
 + * The coefficients (LJ and q) for such particles are zero.
 + * But we might still get NaN as 0*NaN when distances are too small.
 + * We hope that -107 nm is far away enough from to zero
 + * to avoid accidental short distances to particles shifted down for pbc.
 + */
 +#define NBAT_FAR_AWAY 107
 +
 +    switch (nbatFormat)
 +    {
 +        case nbatXYZ:
 +            j = a0*STRIDE_XYZ;
 +            for (i = 0; i < na; i++)
 +            {
 +                xnb[j++] = x[a[i]][XX];
 +                xnb[j++] = x[a[i]][YY];
 +                xnb[j++] = x[a[i]][ZZ];
 +            }
 +            /* Complete the partially filled last cell with copies of the last element.
 +             * This simplifies the bounding box calculation and avoid
 +             * numerical issues with atoms that are coincidentally close.
 +             */
 +            for (; i < na_round; i++)
 +            {
 +                xnb[j++] = -NBAT_FAR_AWAY*(1 + cx);
 +                xnb[j++] = -NBAT_FAR_AWAY*(1 + cy);
 +                xnb[j++] = -NBAT_FAR_AWAY*(1 + cz + i);
 +            }
 +            break;
 +        case nbatXYZQ:
 +            j = a0*STRIDE_XYZQ;
 +            for (i = 0; i < na; i++)
 +            {
 +                xnb[j++] = x[a[i]][XX];
 +                xnb[j++] = x[a[i]][YY];
 +                xnb[j++] = x[a[i]][ZZ];
 +                j++;
 +            }
 +            /* Complete the partially filled last cell with particles far apart */
 +            for (; i < na_round; i++)
 +            {
 +                xnb[j++] = -NBAT_FAR_AWAY*(1 + cx);
 +                xnb[j++] = -NBAT_FAR_AWAY*(1 + cy);
 +                xnb[j++] = -NBAT_FAR_AWAY*(1 + cz + i);
 +                j++;
 +            }
 +            break;
 +        case nbatX4:
 +            j = X4_IND_A(a0);
 +            c = a0 & (PACK_X4-1);
 +            for (i = 0; i < na; i++)
 +            {
 +                xnb[j+XX*PACK_X4] = x[a[i]][XX];
 +                xnb[j+YY*PACK_X4] = x[a[i]][YY];
 +                xnb[j+ZZ*PACK_X4] = x[a[i]][ZZ];
 +                j++;
 +                c++;
 +                if (c == PACK_X4)
 +                {
 +                    j += (DIM-1)*PACK_X4;
 +                    c  = 0;
 +                }
 +            }
 +            /* Complete the partially filled last cell with particles far apart */
 +            for (; i < na_round; i++)
 +            {
 +                xnb[j+XX*PACK_X4] = -NBAT_FAR_AWAY*(1 + cx);
 +                xnb[j+YY*PACK_X4] = -NBAT_FAR_AWAY*(1 + cy);
 +                xnb[j+ZZ*PACK_X4] = -NBAT_FAR_AWAY*(1 + cz + i);
 +                j++;
 +                c++;
 +                if (c == PACK_X4)
 +                {
 +                    j += (DIM-1)*PACK_X4;
 +                    c  = 0;
 +                }
 +            }
 +            break;
 +        case nbatX8:
 +            j = X8_IND_A(a0);
 +            c = a0 & (PACK_X8 - 1);
 +            for (i = 0; i < na; i++)
 +            {
 +                xnb[j+XX*PACK_X8] = x[a[i]][XX];
 +                xnb[j+YY*PACK_X8] = x[a[i]][YY];
 +                xnb[j+ZZ*PACK_X8] = x[a[i]][ZZ];
 +                j++;
 +                c++;
 +                if (c == PACK_X8)
 +                {
 +                    j += (DIM-1)*PACK_X8;
 +                    c  = 0;
 +                }
 +            }
 +            /* Complete the partially filled last cell with particles far apart */
 +            for (; i < na_round; i++)
 +            {
 +                xnb[j+XX*PACK_X8] = -NBAT_FAR_AWAY*(1 + cx);
 +                xnb[j+YY*PACK_X8] = -NBAT_FAR_AWAY*(1 + cy);
 +                xnb[j+ZZ*PACK_X8] = -NBAT_FAR_AWAY*(1 + cz + i);
 +                j++;
 +                c++;
 +                if (c == PACK_X8)
 +                {
 +                    j += (DIM-1)*PACK_X8;
 +                    c  = 0;
 +                }
 +            }
 +            break;
 +        default:
-             /* In nbfp_s4 we use a stride of 4 for storing two parameters */
++            gmx_incons("Unsupported nbnxn_atomdata_t format");
 +    }
 +}
 +
 +/* Determines the combination rule (or none) to be used, stores it,
 + * and sets the LJ parameters required with the rule.
 + */
 +static void set_combination_rule_data(nbnxn_atomdata_t *nbat)
 +{
 +    int  nt, i, j;
 +    real c6, c12;
 +
 +    nt = nbat->ntype;
 +
 +    switch (nbat->comb_rule)
 +    {
 +        case  ljcrGEOM:
 +            nbat->comb_rule = ljcrGEOM;
 +
 +            for (i = 0; i < nt; i++)
 +            {
 +                /* Copy the diagonal from the nbfp matrix */
 +                nbat->nbfp_comb[i*2  ] = sqrt(nbat->nbfp[(i*nt+i)*2  ]);
 +                nbat->nbfp_comb[i*2+1] = sqrt(nbat->nbfp[(i*nt+i)*2+1]);
 +            }
 +            break;
 +        case ljcrLB:
 +            for (i = 0; i < nt; i++)
 +            {
 +                /* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */
 +                c6  = nbat->nbfp[(i*nt+i)*2  ];
 +                c12 = nbat->nbfp[(i*nt+i)*2+1];
 +                if (c6 > 0 && c12 > 0)
 +                {
 +                    /* We store 0.5*2^1/6*sigma and sqrt(4*3*eps),
 +                     * so we get 6*C6 and 12*C12 after combining.
 +                     */
 +                    nbat->nbfp_comb[i*2  ] = 0.5*pow(c12/c6, 1.0/6.0);
 +                    nbat->nbfp_comb[i*2+1] = sqrt(c6*c6/c12);
 +                }
 +                else
 +                {
 +                    nbat->nbfp_comb[i*2  ] = 0;
 +                    nbat->nbfp_comb[i*2+1] = 0;
 +                }
 +            }
 +            break;
 +        case ljcrNONE:
++            /* nbfp_s4 stores two parameters using a stride of 4,
++             * because this would suit x86 SIMD single-precision
++             * quad-load intrinsics. There's a slight inefficiency in
++             * allocating and initializing nbfp_s4 when it might not
++             * be used, but introducing the conditional code is not
++             * really worth it. */
 +            nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
 +            for (i = 0; i < nt; i++)
 +            {
 +                for (j = 0; j < nt; j++)
 +                {
 +                    nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0];
 +                    nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1];
 +                    nbat->nbfp_s4[(i*nt+j)*4+2] = 0;
 +                    nbat->nbfp_s4[(i*nt+j)*4+3] = 0;
 +                }
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown combination rule");
 +            break;
 +    }
 +}
 +
 +/* Initializes an nbnxn_atomdata_t data structure */
 +void nbnxn_atomdata_init(FILE *fp,
 +                         nbnxn_atomdata_t *nbat,
 +                         int nb_kernel_type,
 +                         int ntype, const real *nbfp,
 +                         int n_energygroups,
 +                         int nout,
 +                         nbnxn_alloc_t *alloc,
 +                         nbnxn_free_t  *free)
 +{
 +    int      i, j;
 +    real     c6, c12, tol;
 +    char    *ptr;
 +    gmx_bool simple, bCombGeom, bCombLB;
 +
 +    if (alloc == NULL)
 +    {
 +        nbat->alloc = nbnxn_alloc_aligned;
 +    }
 +    else
 +    {
 +        nbat->alloc = alloc;
 +    }
 +    if (free == NULL)
 +    {
 +        nbat->free = nbnxn_free_aligned;
 +    }
 +    else
 +    {
 +        nbat->free = free;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "There are %d atom types in the system, adding one for nbnxn_atomdata_t\n", ntype);
 +    }
 +    nbat->ntype = ntype + 1;
 +    nbat->alloc((void **)&nbat->nbfp,
 +                nbat->ntype*nbat->ntype*2*sizeof(*nbat->nbfp));
 +    nbat->alloc((void **)&nbat->nbfp_comb, nbat->ntype*2*sizeof(*nbat->nbfp_comb));
 +
 +    /* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
 +     * force-field floating point parameters.
 +     */
 +    tol = 1e-5;
 +    ptr = getenv("GMX_LJCOMB_TOL");
 +    if (ptr != NULL)
 +    {
 +        double dbl;
 +
 +        sscanf(ptr, "%lf", &dbl);
 +        tol = dbl;
 +    }
 +    bCombGeom = TRUE;
 +    bCombLB   = TRUE;
 +
 +    /* Temporarily fill nbat->nbfp_comb with sigma and epsilon
 +     * to check for the LB rule.
 +     */
 +    for (i = 0; i < ntype; i++)
 +    {
 +        c6  = nbfp[(i*ntype+i)*2  ]/6.0;
 +        c12 = nbfp[(i*ntype+i)*2+1]/12.0;
 +        if (c6 > 0 && c12 > 0)
 +        {
 +            nbat->nbfp_comb[i*2  ] = pow(c12/c6, 1.0/6.0);
 +            nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12;
 +        }
 +        else if (c6 == 0 && c12 == 0)
 +        {
 +            nbat->nbfp_comb[i*2  ] = 0;
 +            nbat->nbfp_comb[i*2+1] = 0;
 +        }
 +        else
 +        {
 +            /* Can not use LB rule with only dispersion or repulsion */
 +            bCombLB = FALSE;
 +        }
 +    }
 +
 +    for (i = 0; i < nbat->ntype; i++)
 +    {
 +        for (j = 0; j < nbat->ntype; j++)
 +        {
 +            if (i < ntype && j < ntype)
 +            {
 +                /* fr->nbfp has been updated, so that array too now stores c6/c12 including
 +                 * the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
 +                 */
 +                c6  = nbfp[(i*ntype+j)*2  ];
 +                c12 = nbfp[(i*ntype+j)*2+1];
 +                nbat->nbfp[(i*nbat->ntype+j)*2  ] = c6;
 +                nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
 +
 +                /* Compare 6*C6 and 12*C12 for geometric cobination rule */
 +                bCombGeom = bCombGeom &&
 +                    gmx_within_tol(c6*c6, nbfp[(i*ntype+i)*2  ]*nbfp[(j*ntype+j)*2  ], tol) &&
 +                    gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1], tol);
 +
 +                /* Compare C6 and C12 for Lorentz-Berthelot combination rule */
 +                c6     /= 6.0;
 +                c12    /= 12.0;
 +                bCombLB = bCombLB &&
 +                    ((c6 == 0 && c12 == 0 &&
 +                      (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) ||
 +                     (c6 > 0 && c12 > 0 &&
 +                      gmx_within_tol(pow(c12/c6, 1.0/6.0), 0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
 +                      gmx_within_tol(0.25*c6*c6/c12, sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
 +            }
 +            else
 +            {
 +                /* Add zero parameters for the additional dummy atom type */
 +                nbat->nbfp[(i*nbat->ntype+j)*2  ] = 0;
 +                nbat->nbfp[(i*nbat->ntype+j)*2+1] = 0;
 +            }
 +        }
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug, "Combination rules: geometric %d Lorentz-Berthelot %d\n",
 +                bCombGeom, bCombLB);
 +    }
 +
 +    simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
 +
 +    if (simple)
 +    {
 +        /* We prefer the geometic combination rule,
 +         * as that gives a slightly faster kernel than the LB rule.
 +         */
 +        if (bCombGeom)
 +        {
 +            nbat->comb_rule = ljcrGEOM;
 +        }
 +        else if (bCombLB)
 +        {
 +            nbat->comb_rule = ljcrLB;
 +        }
 +        else
 +        {
 +            nbat->comb_rule = ljcrNONE;
 +
 +            nbat->free(nbat->nbfp_comb);
 +        }
 +
 +        if (fp)
 +        {
 +            if (nbat->comb_rule == ljcrNONE)
 +            {
 +                fprintf(fp, "Using full Lennard-Jones parameter combination matrix\n\n");
 +            }
 +            else
 +            {
 +                fprintf(fp, "Using %s Lennard-Jones combination rule\n\n",
 +                        nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
 +            }
 +        }
 +
 +        set_combination_rule_data(nbat);
 +    }
 +    else
 +    {
 +        nbat->comb_rule = ljcrNONE;
 +
 +        nbat->free(nbat->nbfp_comb);
 +    }
 +
 +    nbat->natoms  = 0;
 +    nbat->type    = NULL;
 +    nbat->lj_comb = NULL;
 +    if (simple)
 +    {
 +        int pack_x;
 +
 +        switch (nb_kernel_type)
 +        {
 +            case nbnxnk4xN_SIMD_4xN:
 +            case nbnxnk4xN_SIMD_2xNN:
 +                pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE,
 +                             nbnxn_kernel_to_cj_size(nb_kernel_type));
 +                switch (pack_x)
 +                {
 +                    case 4:
 +                        nbat->XFormat = nbatX4;
 +                        break;
 +                    case 8:
 +                        nbat->XFormat = nbatX8;
 +                        break;
 +                    default:
 +                        gmx_incons("Unsupported packing width");
 +                }
 +                break;
 +            default:
 +                nbat->XFormat = nbatXYZ;
 +                break;
 +        }
 +
 +        nbat->FFormat = nbat->XFormat;
 +    }
 +    else
 +    {
 +        nbat->XFormat = nbatXYZQ;
 +        nbat->FFormat = nbatXYZ;
 +    }
 +    nbat->q        = NULL;
 +    nbat->nenergrp = n_energygroups;
 +    if (!simple)
 +    {
 +        /* Energy groups not supported yet for super-sub lists */
 +        if (n_energygroups > 1 && fp != NULL)
 +        {
 +            fprintf(fp, "\nNOTE: With GPUs, reporting energy group contributions is not supported\n\n");
 +        }
 +        nbat->nenergrp = 1;
 +    }
 +    /* Temporary storage goes as #grp^3*simd_width^2/2, so limit to 64 */
 +    if (nbat->nenergrp > 64)
 +    {
 +        gmx_fatal(FARGS, "With NxN kernels not more than 64 energy groups are supported\n");
 +    }
 +    nbat->neg_2log = 1;
 +    while (nbat->nenergrp > (1<<nbat->neg_2log))
 +    {
 +        nbat->neg_2log++;
 +    }
 +    nbat->energrp = NULL;
 +    nbat->alloc((void **)&nbat->shift_vec, SHIFTS*sizeof(*nbat->shift_vec));
 +    nbat->xstride = (nbat->XFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
 +    nbat->fstride = (nbat->FFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
 +    nbat->x       = NULL;
 +
 +#ifdef GMX_NBNXN_SIMD
 +    if (simple)
 +    {
 +        /* Set the diagonal cluster pair exclusion mask setup data.
 +         * In the kernel we check 0 < j - i to generate the masks.
 +         * Here we store j - i for generating the mask for the first i,
 +         * we substract 0.5 to avoid rounding issues.
 +         * In the kernel we can subtract 1 to generate the subsequent mask.
 +         */
 +        const int simd_width = GMX_NBNXN_SIMD_BITWIDTH/(sizeof(real)*8);
 +        int       simd_4xn_diag_size, j;
 +
 +        simd_4xn_diag_size = max(NBNXN_CPU_CLUSTER_I_SIZE, simd_width);
 +        snew_aligned(nbat->simd_4xn_diag, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
 +        for (j = 0; j < simd_4xn_diag_size; j++)
 +        {
 +            nbat->simd_4xn_diag[j] = j - 0.5;
 +        }
 +
 +        snew_aligned(nbat->simd_2xnn_diag, simd_width, NBNXN_MEM_ALIGN);
 +        for (j = 0; j < simd_width/2; j++)
 +        {
 +            /* The j-cluster size is half the SIMD width */
 +            nbat->simd_2xnn_diag[j]              = j - 0.5;
 +            /* The next half of the SIMD width is for i + 1 */
 +            nbat->simd_2xnn_diag[simd_width/2+j] = j - 1 - 0.5;
 +        }
 +    }
 +#endif
 +
 +    /* Initialize the output data structures */
 +    nbat->nout    = nout;
 +    snew(nbat->out, nbat->nout);
 +    nbat->nalloc  = 0;
 +    for (i = 0; i < nbat->nout; i++)
 +    {
 +        nbnxn_atomdata_output_init(&nbat->out[i],
 +                                   nb_kernel_type,
 +                                   nbat->nenergrp, 1<<nbat->neg_2log,
 +                                   nbat->alloc);
 +    }
 +    nbat->buffer_flags.flag        = NULL;
 +    nbat->buffer_flags.flag_nalloc = 0;
 +}
 +
 +static void copy_lj_to_nbat_lj_comb_x4(const real *ljparam_type,
 +                                       const int *type, int na,
 +                                       real *ljparam_at)
 +{
 +    int is, k, i;
 +
 +    /* The LJ params follow the combination rule:
 +     * copy the params for the type array to the atom array.
 +     */
 +    for (is = 0; is < na; is += PACK_X4)
 +    {
 +        for (k = 0; k < PACK_X4; k++)
 +        {
 +            i = is + k;
 +            ljparam_at[is*2        +k] = ljparam_type[type[i]*2  ];
 +            ljparam_at[is*2+PACK_X4+k] = ljparam_type[type[i]*2+1];
 +        }
 +    }
 +}
 +
 +static void copy_lj_to_nbat_lj_comb_x8(const real *ljparam_type,
 +                                       const int *type, int na,
 +                                       real *ljparam_at)
 +{
 +    int is, k, i;
 +
 +    /* The LJ params follow the combination rule:
 +     * copy the params for the type array to the atom array.
 +     */
 +    for (is = 0; is < na; is += PACK_X8)
 +    {
 +        for (k = 0; k < PACK_X8; k++)
 +        {
 +            i = is + k;
 +            ljparam_at[is*2        +k] = ljparam_type[type[i]*2  ];
 +            ljparam_at[is*2+PACK_X8+k] = ljparam_type[type[i]*2+1];
 +        }
 +    }
 +}
 +
 +/* Sets the atom type and LJ data in nbnxn_atomdata_t */
 +static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
 +                                         int                  ngrid,
 +                                         const nbnxn_search_t nbs,
 +                                         const int           *type)
 +{
 +    int                 g, i, ncz, ash;
 +    const nbnxn_grid_t *grid;
 +
 +    for (g = 0; g < ngrid; g++)
 +    {
 +        grid = &nbs->grid[g];
 +
 +        /* Loop over all columns and copy and fill */
 +        for (i = 0; i < grid->ncx*grid->ncy; i++)
 +        {
 +            ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
 +            ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
 +
 +            copy_int_to_nbat_int(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc,
 +                                 type, nbat->ntype-1, nbat->type+ash);
 +
 +            if (nbat->comb_rule != ljcrNONE)
 +            {
 +                if (nbat->XFormat == nbatX4)
 +                {
 +                    copy_lj_to_nbat_lj_comb_x4(nbat->nbfp_comb,
 +                                               nbat->type+ash, ncz*grid->na_sc,
 +                                               nbat->lj_comb+ash*2);
 +                }
 +                else if (nbat->XFormat == nbatX8)
 +                {
 +                    copy_lj_to_nbat_lj_comb_x8(nbat->nbfp_comb,
 +                                               nbat->type+ash, ncz*grid->na_sc,
 +                                               nbat->lj_comb+ash*2);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* Sets the charges in nbnxn_atomdata_t *nbat */
 +static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
 +                                       int                  ngrid,
 +                                       const nbnxn_search_t nbs,
 +                                       const real          *charge)
 +{
 +    int                 g, cxy, ncz, ash, na, na_round, i, j;
 +    real               *q;
 +    const nbnxn_grid_t *grid;
 +
 +    for (g = 0; g < ngrid; g++)
 +    {
 +        grid = &nbs->grid[g];
 +
 +        /* Loop over all columns and copy and fill */
 +        for (cxy = 0; cxy < grid->ncx*grid->ncy; cxy++)
 +        {
 +            ash      = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
 +            na       = grid->cxy_na[cxy];
 +            na_round = (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc;
 +
 +            if (nbat->XFormat == nbatXYZQ)
 +            {
 +                q = nbat->x + ash*STRIDE_XYZQ + ZZ + 1;
 +                for (i = 0; i < na; i++)
 +                {
 +                    *q = charge[nbs->a[ash+i]];
 +                    q += STRIDE_XYZQ;
 +                }
 +                /* Complete the partially filled last cell with zeros */
 +                for (; i < na_round; i++)
 +                {
 +                    *q = 0;
 +                    q += STRIDE_XYZQ;
 +                }
 +            }
 +            else
 +            {
 +                q = nbat->q + ash;
 +                for (i = 0; i < na; i++)
 +                {
 +                    *q = charge[nbs->a[ash+i]];
 +                    q++;
 +                }
 +                /* Complete the partially filled last cell with zeros */
 +                for (; i < na_round; i++)
 +                {
 +                    *q = 0;
 +                    q++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* Copies the energy group indices to a reordered and packed array */
 +static void copy_egp_to_nbat_egps(const int *a, int na, int na_round,
 +                                  int na_c, int bit_shift,
 +                                  const int *in, int *innb)
 +{
 +    int i, j, sa, at;
 +    int comb;
 +
 +    j = 0;
 +    for (i = 0; i < na; i += na_c)
 +    {
 +        /* Store na_c energy group numbers into one int */
 +        comb = 0;
 +        for (sa = 0; sa < na_c; sa++)
 +        {
 +            at = a[i+sa];
 +            if (at >= 0)
 +            {
 +                comb |= (GET_CGINFO_GID(in[at]) << (sa*bit_shift));
 +            }
 +        }
 +        innb[j++] = comb;
 +    }
 +    /* Complete the partially filled last cell with fill */
 +    for (; i < na_round; i += na_c)
 +    {
 +        innb[j++] = 0;
 +    }
 +}
 +
 +/* Set the energy group indices for atoms in nbnxn_atomdata_t */
 +static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
 +                                            int                  ngrid,
 +                                            const nbnxn_search_t nbs,
 +                                            const int           *atinfo)
 +{
 +    int                 g, i, ncz, ash;
 +    const nbnxn_grid_t *grid;
 +
 +    for (g = 0; g < ngrid; g++)
 +    {
 +        grid = &nbs->grid[g];
 +
 +        /* Loop over all columns and copy and fill */
 +        for (i = 0; i < grid->ncx*grid->ncy; i++)
 +        {
 +            ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
 +            ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
 +
 +            copy_egp_to_nbat_egps(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc,
 +                                  nbat->na_c, nbat->neg_2log,
 +                                  atinfo, nbat->energrp+(ash>>grid->na_c_2log));
 +        }
 +    }
 +}
 +
 +/* Sets all required atom parameter data in nbnxn_atomdata_t */
 +void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
 +                        int                  locality,
 +                        const nbnxn_search_t nbs,
 +                        const t_mdatoms     *mdatoms,
 +                        const int           *atinfo)
 +{
 +    int ngrid;
 +
 +    if (locality == eatLocal)
 +    {
 +        ngrid = 1;
 +    }
 +    else
 +    {
 +        ngrid = nbs->ngrid;
 +    }
 +
 +    nbnxn_atomdata_set_atomtypes(nbat, ngrid, nbs, mdatoms->typeA);
 +
 +    nbnxn_atomdata_set_charges(nbat, ngrid, nbs, mdatoms->chargeA);
 +
 +    if (nbat->nenergrp > 1)
 +    {
 +        nbnxn_atomdata_set_energygroups(nbat, ngrid, nbs, atinfo);
 +    }
 +}
 +
 +/* Copies the shift vector array to nbnxn_atomdata_t */
 +void nbnxn_atomdata_copy_shiftvec(gmx_bool          bDynamicBox,
 +                                  rvec             *shift_vec,
 +                                  nbnxn_atomdata_t *nbat)
 +{
 +    int i;
 +
 +    nbat->bDynamicBox = bDynamicBox;
 +    for (i = 0; i < SHIFTS; i++)
 +    {
 +        copy_rvec(shift_vec[i], nbat->shift_vec[i]);
 +    }
 +}
 +
 +/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
 +void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
 +                                     int                  locality,
 +                                     gmx_bool             FillLocal,
 +                                     rvec                *x,
 +                                     nbnxn_atomdata_t    *nbat)
 +{
 +    int g0 = 0, g1 = 0;
 +    int nth, th;
 +
 +    switch (locality)
 +    {
 +        case eatAll:
 +            g0 = 0;
 +            g1 = nbs->ngrid;
 +            break;
 +        case eatLocal:
 +            g0 = 0;
 +            g1 = 1;
 +            break;
 +        case eatNonlocal:
 +            g0 = 1;
 +            g1 = nbs->ngrid;
 +            break;
 +    }
 +
 +    if (FillLocal)
 +    {
 +        nbat->natoms_local = nbs->grid[0].nc*nbs->grid[0].na_sc;
 +    }
 +
 +    nth = gmx_omp_nthreads_get(emntPairsearch);
 +
 +#pragma omp parallel for num_threads(nth) schedule(static)
 +    for (th = 0; th < nth; th++)
 +    {
 +        int g;
 +
 +        for (g = g0; g < g1; g++)
 +        {
 +            const nbnxn_grid_t *grid;
 +            int                 cxy0, cxy1, cxy;
 +
 +            grid = &nbs->grid[g];
 +
 +            cxy0 = (grid->ncx*grid->ncy* th   +nth-1)/nth;
 +            cxy1 = (grid->ncx*grid->ncy*(th+1)+nth-1)/nth;
 +
 +            for (cxy = cxy0; cxy < cxy1; cxy++)
 +            {
 +                int na, ash, na_fill;
 +
 +                na  = grid->cxy_na[cxy];
 +                ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
 +
 +                if (g == 0 && FillLocal)
 +                {
 +                    na_fill =
 +                        (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc;
 +                }
 +                else
 +                {
 +                    /* We fill only the real particle locations.
 +                     * We assume the filling entries at the end have been
 +                     * properly set before during ns.
 +                     */
 +                    na_fill = na;
 +                }
 +                copy_rvec_to_nbat_real(nbs->a+ash, na, na_fill, x,
 +                                       nbat->XFormat, nbat->x, ash,
 +                                       0, 0, 0);
 +            }
 +        }
 +    }
 +}
 +
 +static void
 +nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
 +                           int i0, int i1)
 +{
 +    int i;
 +
 +    for (i = i0; i < i1; i++)
 +    {
 +        dest[i] = 0;
 +    }
 +}
 +
 +static void
 +nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
 +                            gmx_bool bDestSet,
 +                            real ** gmx_restrict src,
 +                            int nsrc,
 +                            int i0, int i1)
 +{
 +    int i, s;
 +
 +    if (bDestSet)
 +    {
 +        /* The destination buffer contains data, add to it */
 +        for (i = i0; i < i1; i++)
 +        {
 +            for (s = 0; s < nsrc; s++)
 +            {
 +                dest[i] += src[s][i];
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* The destination buffer is unitialized, set it first */
 +        for (i = i0; i < i1; i++)
 +        {
 +            dest[i] = src[0][i];
 +            for (s = 1; s < nsrc; s++)
 +            {
 +                dest[i] += src[s][i];
 +            }
 +        }
 +    }
 +}
 +
 +static void
 +nbnxn_atomdata_reduce_reals_simd(real * gmx_restrict dest,
 +                                 gmx_bool bDestSet,
 +                                 real ** gmx_restrict src,
 +                                 int nsrc,
 +                                 int i0, int i1)
 +{
 +#ifdef GMX_NBNXN_SIMD
 +/* The SIMD width here is actually independent of that in the kernels,
 + * but we use the same width for simplicity (usually optimal anyhow).
 + */
 +#if GMX_NBNXN_SIMD_BITWIDTH == 128
 +#define GMX_MM128_HERE
 +#endif
 +#if GMX_NBNXN_SIMD_BITWIDTH == 256
 +#define GMX_MM256_HERE
 +#endif
 +#include "gmx_simd_macros.h"
 +
 +    int       i, s;
 +    gmx_mm_pr dest_SSE, src_SSE;
 +
 +    if (bDestSet)
 +    {
 +        for (i = i0; i < i1; i += GMX_SIMD_WIDTH_HERE)
 +        {
 +            dest_SSE = gmx_load_pr(dest+i);
 +            for (s = 0; s < nsrc; s++)
 +            {
 +                src_SSE  = gmx_load_pr(src[s]+i);
 +                dest_SSE = gmx_add_pr(dest_SSE, src_SSE);
 +            }
 +            gmx_store_pr(dest+i, dest_SSE);
 +        }
 +    }
 +    else
 +    {
 +        for (i = i0; i < i1; i += GMX_SIMD_WIDTH_HERE)
 +        {
 +            dest_SSE = gmx_load_pr(src[0]+i);
 +            for (s = 1; s < nsrc; s++)
 +            {
 +                src_SSE  = gmx_load_pr(src[s]+i);
 +                dest_SSE = gmx_add_pr(dest_SSE, src_SSE);
 +            }
 +            gmx_store_pr(dest+i, dest_SSE);
 +        }
 +    }
 +
 +#undef GMX_MM128_HERE
 +#undef GMX_MM256_HERE
 +#endif
 +}
 +
 +/* Add part of the force array(s) from nbnxn_atomdata_t to f */
 +static void
 +nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
 +                                    const nbnxn_atomdata_t *nbat,
 +                                    nbnxn_atomdata_output_t *out,
 +                                    int nfa,
 +                                    int a0, int a1,
 +                                    rvec *f)
 +{
 +    int         a, i, fa;
 +    const int  *cell;
 +    const real *fnb;
 +
 +    cell = nbs->cell;
 +
 +    /* Loop over all columns and copy and fill */
 +    switch (nbat->FFormat)
 +    {
 +        case nbatXYZ:
 +        case nbatXYZQ:
 +            if (nfa == 1)
 +            {
 +                fnb = out[0].f;
 +
 +                for (a = a0; a < a1; a++)
 +                {
 +                    i = cell[a]*nbat->fstride;
 +
 +                    f[a][XX] += fnb[i];
 +                    f[a][YY] += fnb[i+1];
 +                    f[a][ZZ] += fnb[i+2];
 +                }
 +            }
 +            else
 +            {
 +                for (a = a0; a < a1; a++)
 +                {
 +                    i = cell[a]*nbat->fstride;
 +
 +                    for (fa = 0; fa < nfa; fa++)
 +                    {
 +                        f[a][XX] += out[fa].f[i];
 +                        f[a][YY] += out[fa].f[i+1];
 +                        f[a][ZZ] += out[fa].f[i+2];
 +                    }
 +                }
 +            }
 +            break;
 +        case nbatX4:
 +            if (nfa == 1)
 +            {
 +                fnb = out[0].f;
 +
 +                for (a = a0; a < a1; a++)
 +                {
 +                    i = X4_IND_A(cell[a]);
 +
 +                    f[a][XX] += fnb[i+XX*PACK_X4];
 +                    f[a][YY] += fnb[i+YY*PACK_X4];
 +                    f[a][ZZ] += fnb[i+ZZ*PACK_X4];
 +                }
 +            }
 +            else
 +            {
 +                for (a = a0; a < a1; a++)
 +                {
 +                    i = X4_IND_A(cell[a]);
 +
 +                    for (fa = 0; fa < nfa; fa++)
 +                    {
 +                        f[a][XX] += out[fa].f[i+XX*PACK_X4];
 +                        f[a][YY] += out[fa].f[i+YY*PACK_X4];
 +                        f[a][ZZ] += out[fa].f[i+ZZ*PACK_X4];
 +                    }
 +                }
 +            }
 +            break;
 +        case nbatX8:
 +            if (nfa == 1)
 +            {
 +                fnb = out[0].f;
 +
 +                for (a = a0; a < a1; a++)
 +                {
 +                    i = X8_IND_A(cell[a]);
 +
 +                    f[a][XX] += fnb[i+XX*PACK_X8];
 +                    f[a][YY] += fnb[i+YY*PACK_X8];
 +                    f[a][ZZ] += fnb[i+ZZ*PACK_X8];
 +                }
 +            }
 +            else
 +            {
 +                for (a = a0; a < a1; a++)
 +                {
 +                    i = X8_IND_A(cell[a]);
 +
 +                    for (fa = 0; fa < nfa; fa++)
 +                    {
 +                        f[a][XX] += out[fa].f[i+XX*PACK_X8];
 +                        f[a][YY] += out[fa].f[i+YY*PACK_X8];
 +                        f[a][ZZ] += out[fa].f[i+ZZ*PACK_X8];
 +                    }
 +                }
 +            }
 +            break;
++    default:
++        gmx_incons("Unsupported nbnxn_atomdata_t format");
 +    }
 +}
 +
 +/* Add the force array(s) from nbnxn_atomdata_t to f */
 +void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
 +                                    int                     locality,
 +                                    const nbnxn_atomdata_t *nbat,
 +                                    rvec                   *f)
 +{
 +    int a0 = 0, na = 0;
 +    int nth, th;
 +
 +    nbs_cycle_start(&nbs->cc[enbsCCreducef]);
 +
 +    switch (locality)
 +    {
 +        case eatAll:
 +            a0 = 0;
 +            na = nbs->natoms_nonlocal;
 +            break;
 +        case eatLocal:
 +            a0 = 0;
 +            na = nbs->natoms_local;
 +            break;
 +        case eatNonlocal:
 +            a0 = nbs->natoms_local;
 +            na = nbs->natoms_nonlocal - nbs->natoms_local;
 +            break;
 +    }
 +
 +    nth = gmx_omp_nthreads_get(emntNonbonded);
 +
 +    if (nbat->nout > 1)
 +    {
 +        if (locality != eatAll)
 +        {
 +            gmx_incons("add_f_to_f called with nout>1 and locality!=eatAll");
 +        }
 +
 +        /* Reduce the force thread output buffers into buffer 0, before adding
 +         * them to the, differently ordered, "real" force buffer.
 +         */
 +#pragma omp parallel for num_threads(nth) schedule(static)
 +        for (th = 0; th < nth; th++)
 +        {
 +            const nbnxn_buffer_flags_t *flags;
 +            int   b0, b1, b;
 +            int   i0, i1;
 +            int   nfptr;
 +            real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
 +            int   out;
 +
 +            flags = &nbat->buffer_flags;
 +
 +            /* Calculate the cell-block range for our thread */
 +            b0 = (flags->nflag* th   )/nth;
 +            b1 = (flags->nflag*(th+1))/nth;
 +
 +            for (b = b0; b < b1; b++)
 +            {
 +                i0 =  b   *NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
 +                i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
 +
 +                nfptr = 0;
 +                for (out = 1; out < nbat->nout; out++)
 +                {
 +                    if (flags->flag[b] & (1U<<out))
 +                    {
 +                        fptr[nfptr++] = nbat->out[out].f;
 +                    }
 +                }
 +                if (nfptr > 0)
 +                {
 +#ifdef GMX_NBNXN_SIMD
 +                    nbnxn_atomdata_reduce_reals_simd
 +#else
 +                    nbnxn_atomdata_reduce_reals
 +#endif
 +                        (nbat->out[0].f,
 +                        flags->flag[b] & (1U<<0),
 +                        fptr, nfptr,
 +                        i0, i1);
 +                }
 +                else if (!(flags->flag[b] & (1U<<0)))
 +                {
 +                    nbnxn_atomdata_clear_reals(nbat->out[0].f,
 +                                               i0, i1);
 +                }
 +            }
 +        }
 +    }
 +
 +#pragma omp parallel for num_threads(nth) schedule(static)
 +    for (th = 0; th < nth; th++)
 +    {
 +        nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat,
 +                                            nbat->out,
 +                                            1,
 +                                            a0+((th+0)*na)/nth,
 +                                            a0+((th+1)*na)/nth,
 +                                            f);
 +    }
 +
 +    nbs_cycle_stop(&nbs->cc[enbsCCreducef]);
 +}
 +
 +/* Adds the shift forces from nbnxn_atomdata_t to fshift */
 +void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
 +                                              rvec                   *fshift)
 +{
 +    const nbnxn_atomdata_output_t *out;
 +    int  th;
 +    int  s;
 +    rvec sum;
 +
 +    out = nbat->out;
 +
 +    for (s = 0; s < SHIFTS; s++)
 +    {
 +        clear_rvec(sum);
 +        for (th = 0; th < nbat->nout; th++)
 +        {
 +            sum[XX] += out[th].fshift[s*DIM+XX];
 +            sum[YY] += out[th].fshift[s*DIM+YY];
 +            sum[ZZ] += out[th].fshift[s*DIM+ZZ];
 +        }
 +        rvec_inc(fshift[s], sum);
 +    }
 +}
index 3940ba87ade22e868805146468df9308a409aed7,0000000000000000000000000000000000000000..3475ad96da5f9a648271373bbd62aaf5952d4996
mode 100644,000000..100644
--- /dev/null
@@@ -1,908 -1,0 +1,923 @@@
- static void md_print_warn(FILE *fplog, const char *buf)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdlib.h>
 +#include <stdio.h>
 +#include <assert.h>
 +
 +#include <cuda.h>
 +
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "tables.h"
 +#include "typedefs.h"
 +#include "types/nb_verlet.h"
 +#include "types/interaction_const.h"
 +#include "types/force_flags.h"
 +#include "../nbnxn_consts.h"
 +
 +#include "nbnxn_cuda_types.h"
 +#include "../../gmxlib/cuda_tools/cudautils.cuh"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +#include "gpu_utils.h"
 +
 +static bool bUseCudaEventBlockingSync = false; /* makes the CPU thread block */
 +
 +/* This is a heuristically determined parameter for the Fermi architecture for
 + * the minimum size of ci lists by multiplying this constant with the # of
 + * multiprocessors on the current device.
 + */
 +static unsigned int gpu_min_ci_balanced_factor = 40;
 +
 +/* Functions from nbnxn_cuda.cu */
 +extern void nbnxn_cuda_set_cacheconfig(cuda_dev_info_t *devinfo);
 +extern const struct texture<float, 1, cudaReadModeElementType>& nbnxn_cuda_get_nbfp_texref();
 +extern const struct texture<float, 1, cudaReadModeElementType>& nbnxn_cuda_get_coulomb_tab_texref();
 +
 +/* We should actually be using md_print_warn in md_logging.c,
 + * but we can't include mpi.h in CUDA code.
 + */
-         fprintf(stderr, "\n%s\n", buf);
-         fprintf(fplog,  "\n%s\n", buf);
++static void md_print_warn(FILE       *fplog,
++                          const char *fmt, ...)
 +{
++    va_list ap;
++
 +    if (fplog != NULL)
 +    {
 +        /* We should only print to stderr on the master node,
 +         * in most cases fplog is only set on the master node, so this works.
 +         */
-  *  - CUDA version
++        va_start(ap, fmt);
++        fprintf(stderr, "\n");
++        vfprintf(stderr, fmt, ap);
++        fprintf(stderr, "\n");
++        va_end(ap);
++
++        va_start(ap, fmt);
++        fprintf(fplog, "\n");
++        vfprintf(fplog, fmt, ap);
++        fprintf(fplog, "\n");
++        va_end(ap);
 +    }
 +}
 +
++
 +/* Fw. decl. */
 +static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb);
 +
 +
 +/*! Tabulates the Ewald Coulomb force and initializes the size/scale
 +    and the table GPU array. If called with an already allocated table,
 +    it just re-uploads the table.
 + */
 +static void init_ewald_coulomb_force_table(cu_nbparam_t *nbp)
 +{
 +    float       *ftmp, *coul_tab;
 +    int         tabsize;
 +    double      tabscale;
 +    cudaError_t stat;
 +
 +    tabsize     = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +    /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +    tabscale    = (tabsize - 2) / sqrt(nbp->rcoulomb_sq);
 +
 +    pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
 +
 +    table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
 +                                1/tabscale, nbp->ewald_beta);
 +
 +    /* If the table pointer == NULL the table is generated the first time =>
 +       the array pointer will be saved to nbparam and the texture is bound.
 +     */
 +    coul_tab = nbp->coulomb_tab;
 +    if (coul_tab == NULL)
 +    {
 +        stat = cudaMalloc((void **)&coul_tab, tabsize*sizeof(*coul_tab));
 +        CU_RET_ERR(stat, "cudaMalloc failed on coul_tab");
 +
 +        nbp->coulomb_tab = coul_tab;
 +
 +        cudaChannelFormatDesc cd   = cudaCreateChannelDesc<float>();
 +        stat = cudaBindTexture(NULL, &nbnxn_cuda_get_coulomb_tab_texref(),
 +                               coul_tab, &cd, tabsize*sizeof(*coul_tab));
 +        CU_RET_ERR(stat, "cudaBindTexture on coul_tab failed");
 +    }
 +
 +    cu_copy_H2D(coul_tab, ftmp, tabsize*sizeof(*coul_tab));
 +
 +    nbp->coulomb_tab_size     = tabsize;
 +    nbp->coulomb_tab_scale    = tabscale;
 +
 +    pfree(ftmp);
 +}
 +
 +
 +/*! Initializes the atomdata structure first time, it only gets filled at
 +    pair-search. */
 +static void init_atomdata_first(cu_atomdata_t *ad, int ntypes)
 +{
 +    cudaError_t stat;
 +
 +    ad->ntypes  = ntypes;
 +    stat = cudaMalloc((void**)&ad->shift_vec, SHIFTS*sizeof(*ad->shift_vec));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->shift_vec");
 +    ad->bShiftVecUploaded = false;
 +
 +    stat = cudaMalloc((void**)&ad->fshift, SHIFTS*sizeof(*ad->fshift));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->fshift");
 +
 +    stat = cudaMalloc((void**)&ad->e_lj, sizeof(*ad->e_lj));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->e_lj");
 +    stat = cudaMalloc((void**)&ad->e_el, sizeof(*ad->e_el));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->e_el");
 +
 +    /* initialize to NULL poiters to data that is not allocated here and will
 +       need reallocation in nbnxn_cuda_init_atomdata */
 +    ad->xq = NULL;
 +    ad->f  = NULL;
 +
 +    /* size -1 indicates that the respective array hasn't been initialized yet */
 +    ad->natoms = -1;
 +    ad->nalloc = -1;
 +}
 +
 +/*! Initializes the nonbonded parameter data structure. */
 +static void init_nbparam(cu_nbparam_t *nbp,
 +                         const interaction_const_t *ic,
 +                         const nonbonded_verlet_t *nbv)
 +{
 +    cudaError_t stat;
 +    int         ntypes, nnbfp;
 +
 +    ntypes  = nbv->grp[0].nbat->ntype;
 +
 +    nbp->ewald_beta = ic->ewaldcoeff;
 +    nbp->sh_ewald   = ic->sh_ewald;
 +    nbp->epsfac     = ic->epsfac;
 +    nbp->two_k_rf   = 2.0 * ic->k_rf;
 +    nbp->c_rf       = ic->c_rf;
 +    nbp->rvdw_sq    = ic->rvdw * ic->rvdw;
 +    nbp->rcoulomb_sq= ic->rcoulomb * ic->rcoulomb;
 +    nbp->rlist_sq   = ic->rlist * ic->rlist;
 +    nbp->sh_invrc6  = ic->sh_invrc6;
 +
 +    if (ic->eeltype == eelCUT)
 +    {
 +        nbp->eeltype = eelCuCUT;
 +    }
 +    else if (EEL_RF(ic->eeltype))
 +    {
 +        nbp->eeltype = eelCuRF;
 +    }
 +    else if ((EEL_PME(ic->eeltype) || ic->eeltype==eelEWALD))
 +    {
 +        /* Initially rcoulomb == rvdw, so it's surely not twin cut-off, unless
 +           forced by the env. var. (used only for benchmarking). */
 +        if (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL)
 +        {
 +            nbp->eeltype = eelCuEWALD;
 +        }
 +        else
 +        {
 +            nbp->eeltype = eelCuEWALD_TWIN;
 +        }
 +    }
 +    else
 +    {
 +        /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
 +        gmx_incons("The requested electrostatics type is not implemented in the CUDA GPU accelerated kernels!");
 +    }
 +
 +    /* generate table for PME */
 +    if (nbp->eeltype == eelCuEWALD)
 +    {
 +        nbp->coulomb_tab = NULL;
 +        init_ewald_coulomb_force_table(nbp);
 +    }
 +
 +    nnbfp = 2*ntypes*ntypes;
 +    stat = cudaMalloc((void **)&nbp->nbfp, nnbfp*sizeof(*nbp->nbfp));
 +    CU_RET_ERR(stat, "cudaMalloc failed on nbp->nbfp");
 +    cu_copy_H2D(nbp->nbfp, nbv->grp[0].nbat->nbfp, nnbfp*sizeof(*nbp->nbfp));
 +
 +    cudaChannelFormatDesc cd   = cudaCreateChannelDesc<float>();
 +    stat = cudaBindTexture(NULL, &nbnxn_cuda_get_nbfp_texref(),
 +                           nbp->nbfp, &cd, nnbfp*sizeof(*nbp->nbfp));
 +    CU_RET_ERR(stat, "cudaBindTexture on nbfp failed");
 +}
 +
 +/*! Re-generate the GPU Ewald force table, resets rlist, and update the
 + *  electrostatic type switching to twin cut-off (or back) if needed. */
 +void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t cu_nb,
 +                                         const interaction_const_t *ic)
 +{
 +    cu_nbparam_t *nbp = cu_nb->nbparam;
 +
 +    nbp->rlist_sq       = ic->rlist * ic->rlist;
 +    nbp->rcoulomb_sq    = ic->rcoulomb * ic->rcoulomb;
 +    nbp->ewald_beta     = ic->ewaldcoeff;
 +
 +    /* When switching to/from twin cut-off, the electrostatics type needs updating.
 +       (The env. var. that forces twin cut-off is for benchmarking only!) */
 +    if (ic->rcoulomb == ic->rvdw &&
 +        getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL)
 +    {
 +        nbp->eeltype = eelCuEWALD;
 +    }
 +    else
 +    {
 +        nbp->eeltype = eelCuEWALD_TWIN;
 +    }
 +
 +    init_ewald_coulomb_force_table(cu_nb->nbparam);
 +}
 +
 +/*! Initializes the pair list data structure. */
 +static void init_plist(cu_plist_t *pl)
 +{
 +    /* initialize to NULL pointers to data that is not allocated here and will
 +       need reallocation in nbnxn_cuda_init_pairlist */
 +    pl->sci     = NULL;
 +    pl->cj4     = NULL;
 +    pl->excl    = NULL;
 +
 +    /* size -1 indicates that the respective array hasn't been initialized yet */
 +    pl->na_c        = -1;
 +    pl->nsci        = -1;
 +    pl->sci_nalloc  = -1;
 +    pl->ncj4        = -1;
 +    pl->cj4_nalloc  = -1;
 +    pl->nexcl       = -1;
 +    pl->excl_nalloc = -1;
 +    pl->bDoPrune    = false;
 +}
 +
 +/*! Initializes the timer data structure. */
 +static void init_timers(cu_timers_t *t, bool bUseTwoStreams)
 +{
 +    cudaError_t stat;
 +    int eventflags = ( bUseCudaEventBlockingSync ? cudaEventBlockingSync: cudaEventDefault );
 +
 +    stat = cudaEventCreateWithFlags(&(t->start_atdat), eventflags);
 +    CU_RET_ERR(stat, "cudaEventCreate on start_atdat failed");
 +    stat = cudaEventCreateWithFlags(&(t->stop_atdat), eventflags);
 +    CU_RET_ERR(stat, "cudaEventCreate on stop_atdat failed");
 +
 +    /* The non-local counters/stream (second in the array) are needed only with DD. */
 +    for (int i = 0; i <= (bUseTwoStreams ? 1 : 0); i++)
 +    {
 +        stat = cudaEventCreateWithFlags(&(t->start_nb_k[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_nb_k failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_nb_k[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_nb_k failed");
 +
 +
 +        stat = cudaEventCreateWithFlags(&(t->start_pl_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_pl_h2d failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_pl_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_pl_h2d failed");
 +
 +        stat = cudaEventCreateWithFlags(&(t->start_nb_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_nb_h2d failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_nb_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_nb_h2d failed");
 +
 +        stat = cudaEventCreateWithFlags(&(t->start_nb_d2h[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_nb_d2h failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_nb_d2h[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_nb_d2h failed");
 +    }
 +}
 +
 +/*! Initializes the timings data structure. */
 +static void init_timings(wallclock_gpu_t *t)
 +{
 +    int i, j;
 +
 +    t->nb_h2d_t = 0.0;
 +    t->nb_d2h_t = 0.0;
 +    t->nb_c    = 0;
 +    t->pl_h2d_t = 0.0;
 +    t->pl_h2d_c = 0;
 +    for (i = 0; i < 2; i++)
 +    {
 +        for(j = 0; j < 2; j++)
 +        {
 +            t->ktime[i][j].t = 0.0;
 +            t->ktime[i][j].c = 0;
 +        }
 +    }
 +}
 +
 +/* Decide which kernel version to use (default or legacy) based on:
-  *  - GPU SM version TODO ???
++ *  - CUDA version used for compilation
 + *  - non-bonded kernel selector environment variables
- static int pick_nbnxn_kernel_version()
++ *  - GPU architecture version
 + */
-     bool bLegacyKernel, bDefaultKernel, bCUDA40, bCUDA32;
++static int pick_nbnxn_kernel_version(FILE            *fplog,
++                                     cuda_dev_info_t *devinfo)
 +{
-     /* legacy kernel (former k2), kept for now for backward compatibility,
-        faster than the default with  CUDA 3.2/4.0 (TODO: on Kepler?). */
-     bLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
++    bool bForceLegacyKernel, bForceDefaultKernel, bCUDA40, bCUDA32;
 +    char sbuf[STRLEN];
 +    int  kver;
 +
-     bDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
++    /* Legacy kernel (former k2), kept for backward compatibility as it is
++       faster than the default with CUDA 3.2/4.0 on Fermi (not on Kepler). */
++    bForceLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
 +    /* default kernel (former k3). */
-     if ((unsigned)(bLegacyKernel + bDefaultKernel) > 1)
++    bForceDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
 +
-     if (bCUDA32 || bCUDA40)
++    if ((unsigned)(bForceLegacyKernel + bForceDefaultKernel) > 1)
 +    {
 +        gmx_fatal(FARGS, "Multiple CUDA non-bonded kernels requested; to manually pick a kernel set only one \n"
 +                  "of the following environment variables: \n"
 +                  "GMX_CUDA_NB_DEFAULT, GMX_CUDA_NB_LEGACY");
 +    }
 +
 +    bCUDA32 = bCUDA40 = false;
 +#if CUDA_VERSION == 3200
 +    bCUDA32 = true;
 +    sprintf(sbuf, "3.2");
 +#elif CUDA_VERSION == 4000
 +    bCUDA40 = true;
 +    sprintf(sbuf, "4.0");
 +#endif
 +
 +    /* default is default ;) */
 +    kver = eNbnxnCuKDefault;
 +
-         if (bDefaultKernel)
++    /* Consider switching to legacy kernels only on Fermi */
++    if (devinfo->prop.major < 3 && (bCUDA32 || bCUDA40))
 +    {
 +        /* use legacy kernel unless something else is forced by an env. var */
-             fprintf(stderr,
-                     "\nNOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
-                     "      non-bonded kernels perform best. However, the default kernels were\n"
-                     "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
-                     "      For best performance upgrade your CUDA toolkit.",
-                     sbuf);
++        if (bForceDefaultKernel)
 +        {
-         /* issue not if the non-default kernel is forced by an env. var */
-         if (bLegacyKernel)
++            md_print_warn(fplog,
++                          "NOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
++                          "      non-bonded kernels perform best. However, the default kernels were\n"
++                          "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
++                          "      For best performance upgrade your CUDA toolkit.\n",
++                          sbuf);
 +        }
 +        else
 +        {
 +            kver = eNbnxnCuKLegacy;
 +        }
 +    }
 +    else
 +    {
-             fprintf(stderr,
-                     "\nNOTE: Legacy non-bonded CUDA kernels were selected by the GMX_CUDA_NB_LEGACY\n"
++        /* issue note if the non-default kernel is forced by an env. var */
++        if (bForceLegacyKernel)
 +        {
-     nb->kernel_ver = pick_nbnxn_kernel_version();
++            md_print_warn(fplog,
++                    "NOTE: Legacy non-bonded CUDA kernels selected by the GMX_CUDA_NB_LEGACY\n"
 +                    "      env. var. Consider using using the default kernels which should be faster!\n");
 +
 +            kver = eNbnxnCuKLegacy;
 +        }
 +    }
 +
 +    return kver;
 +}
 +
 +void nbnxn_cuda_init(FILE *fplog,
 +                     nbnxn_cuda_ptr_t *p_cu_nb,
 +                     gmx_gpu_info_t *gpu_info, int my_gpu_index,
 +                     gmx_bool bLocalAndNonlocal)
 +{
 +    cudaError_t stat;
 +    nbnxn_cuda_ptr_t  nb;
 +    char sbuf[STRLEN];
 +    bool bStreamSync, bNoStreamSync, bTMPIAtomics, bX86, bOldDriver;
 +    int cuda_drv_ver;
 +
 +    assert(gpu_info);
 +
 +    if (p_cu_nb == NULL) return;
 +
 +    snew(nb, 1);
 +    snew(nb->atdat, 1);
 +    snew(nb->nbparam, 1);
 +    snew(nb->plist[eintLocal], 1);
 +    if (bLocalAndNonlocal)
 +    {
 +        snew(nb->plist[eintNonlocal], 1);
 +    }
 +
 +    nb->bUseTwoStreams = bLocalAndNonlocal;
 +
 +    snew(nb->timers, 1);
 +    snew(nb->timings, 1);
 +
 +    /* init nbst */
 +    pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
 +    pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
 +    pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
 +
 +    init_plist(nb->plist[eintLocal]);
 +
 +    /* local/non-local GPU streams */
 +    stat = cudaStreamCreate(&nb->stream[eintLocal]);
 +    CU_RET_ERR(stat, "cudaStreamCreate on stream[eintLocal] failed");
 +    if (nb->bUseTwoStreams)
 +    {
 +        init_plist(nb->plist[eintNonlocal]);
 +        stat = cudaStreamCreate(&nb->stream[eintNonlocal]);
 +        CU_RET_ERR(stat, "cudaStreamCreate on stream[eintNonlocal] failed");
 +    }
 +
 +    /* init events for sychronization (timing disabled for performance reasons!) */
 +    stat = cudaEventCreateWithFlags(&nb->nonlocal_done, cudaEventDisableTiming);
 +    CU_RET_ERR(stat, "cudaEventCreate on nonlocal_done failed");
 +    stat = cudaEventCreateWithFlags(&nb->misc_ops_done, cudaEventDisableTiming);
 +    CU_RET_ERR(stat, "cudaEventCreate on misc_ops_one failed");
 +
 +    /* set device info, just point it to the right GPU among the detected ones */
 +    nb->dev_info = &gpu_info->cuda_dev[get_gpu_device_id(gpu_info, my_gpu_index)];
 +
 +    /* On GPUs with ECC enabled, cudaStreamSynchronize shows a large overhead
 +     * (which increases with shorter time/step) caused by a known CUDA driver bug.
 +     * To work around the issue we'll use an (admittedly fragile) memory polling
 +     * waiting to preserve performance. This requires support for atomic
 +     * operations and only works on x86/x86_64.
 +     * With polling wait event-timing also needs to be disabled.
 +     *
 +     * The overhead is greatly reduced in API v5.0 drivers and the improvement
 +     $ is independent of runtime version. Hence, with API v5.0 drivers and later
 +     * we won't switch to polling.
 +     *
 +     * NOTE: Unfortunately, this is known to fail when GPUs are shared by (t)MPI,
 +     * ranks so we will also disable it in that case.
 +     */
 +
 +    bStreamSync    = getenv("GMX_CUDA_STREAMSYNC") != NULL;
 +    bNoStreamSync  = getenv("GMX_NO_CUDA_STREAMSYNC") != NULL;
 +
 +#ifdef TMPI_ATOMICS
 +    bTMPIAtomics = true;
 +#else
 +    bTMPIAtomics = false;
 +#endif
 +
 +#if defined(i386) || defined(__x86_64__)
 +    bX86 = true;
 +#else
 +    bX86 = false;
 +#endif
 +
 +    if (bStreamSync && bNoStreamSync)
 +    {
 +        gmx_fatal(FARGS, "Conflicting environment variables: both GMX_CUDA_STREAMSYNC and GMX_NO_CUDA_STREAMSYNC defined");
 +    }
 +
 +    stat = cudaDriverGetVersion(&cuda_drv_ver);
 +    CU_RET_ERR(stat, "cudaDriverGetVersion failed");
 +    bOldDriver = (cuda_drv_ver < 5000);
 +
 +    if (nb->dev_info->prop.ECCEnabled == 1)
 +    {
 +        if (bStreamSync)
 +        {
 +            nb->bUseStreamSync = true;
 +
 +            /* only warn if polling should be used */
 +            if (bOldDriver && !gpu_info->bDevShare)
 +            {
 +                md_print_warn(fplog,
 +                              "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
 +                              "      cudaStreamSynchronize waiting is forced by the GMX_CUDA_STREAMSYNC env. var.\n");
 +            }
 +        }
 +        else
 +        {
 +            /* Can/should turn of cudaStreamSynchronize wait only if
 +             *   - we're on x86/x86_64
 +             *   - atomics are available
 +             *   - GPUs are not being shared
 +             *   - and driver is old. */
 +            nb->bUseStreamSync =
 +                (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ?
 +                true : false;
 +
 +            if (nb->bUseStreamSync)
 +            {
 +                md_print_warn(fplog,
 +                              "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
 +                              "      cause performance loss. Switching to the alternative polling GPU waiting.\n"
 +                              "      If you encounter issues, switch back to standard GPU waiting by setting\n"
 +                              "      the GMX_CUDA_STREAMSYNC environment variable.\n");
 +            }
 +            else if (bOldDriver)
 +            {
 +                /* Tell the user that the ECC+old driver combination can be bad */
 +                sprintf(sbuf,
 +                        "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n"
 +                        "      driver can cause performance loss.\n"
 +                        "      However, the polling waiting workaround can not be used because\n%s\n"
 +                        "      Consider updating the driver or turning ECC off.",
 +                        (!bX86 || !bTMPIAtomics) ?
 +                           "         atomic operations are not supported by the platform/CPU+compiler." :
 +                           "         GPU(s) are being oversubscribed.");
 +                md_print_warn(fplog, sbuf);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        if (bNoStreamSync)
 +        {
 +            nb->bUseStreamSync = false;
 +
 +            md_print_warn(fplog,
 +                          "NOTE: Polling wait for GPU synchronization requested by GMX_NO_CUDA_STREAMSYNC\n");
 +        }
 +        else
 +        {
 +            /* no/off ECC, cudaStreamSynchronize not turned off by env. var. */
 +            nb->bUseStreamSync = true;
 +        }
 +    }
 +
 +    /* CUDA timing disabled as event timers don't work:
 +       - with multiple streams = domain-decomposition;
 +       - with the polling waiting hack (without cudaStreamSynchronize);
 +       - when turned off by GMX_DISABLE_CUDA_TIMING.
 +     */
 +    nb->bDoTime = (!nb->bUseTwoStreams && nb->bUseStreamSync &&
 +                   (getenv("GMX_DISABLE_CUDA_TIMING") == NULL));
 +
 +    if (nb->bDoTime)
 +    {
 +        init_timers(nb->timers, nb->bUseTwoStreams);
 +        init_timings(nb->timings);
 +    }
 +
 +    /* set the kernel type for the current GPU */
++    nb->kernel_ver = pick_nbnxn_kernel_version(fplog, nb->dev_info);
 +    /* pick L1 cache configuration */
 +    nbnxn_cuda_set_cacheconfig(nb->dev_info);
 +
 +    *p_cu_nb = nb;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Initialized CUDA data structures.\n");
 +    }
 +}
 +
 +void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t cu_nb,
 +                           const interaction_const_t *ic,
 +                           const nonbonded_verlet_t *nbv)
 +{
 +    init_atomdata_first(cu_nb->atdat, nbv->grp[0].nbat->ntype);
 +    init_nbparam(cu_nb->nbparam, ic, nbv);
 +
 +    /* clear energy and shift force outputs */
 +    nbnxn_cuda_clear_e_fshift(cu_nb);
 +}
 +
 +void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t cu_nb,
 +                              const nbnxn_pairlist_t *h_plist,
 +                              int iloc)
 +{
 +    char         sbuf[STRLEN];
 +    cudaError_t  stat;
 +    bool         bDoTime    = cu_nb->bDoTime;
 +    cudaStream_t stream     = cu_nb->stream[iloc];
 +    cu_plist_t   *d_plist   = cu_nb->plist[iloc];
 +
 +    if (d_plist->na_c < 0)
 +    {
 +        d_plist->na_c = h_plist->na_ci;
 +    }
 +    else
 +    {
 +        if (d_plist->na_c != h_plist->na_ci)
 +        {
 +            sprintf(sbuf, "In cu_init_plist: the #atoms per cell has changed (from %d to %d)",
 +                    d_plist->na_c, h_plist->na_ci);
 +            gmx_incons(sbuf);
 +        }
 +    }
 +
 +    if (bDoTime)
 +    {
 +        stat = cudaEventRecord(cu_nb->timers->start_pl_h2d[iloc], stream);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +
 +    cu_realloc_buffered((void **)&d_plist->sci, h_plist->sci, sizeof(*d_plist->sci),
 +                         &d_plist->nsci, &d_plist->sci_nalloc,
 +                         h_plist->nsci,
 +                         stream, true);
 +
 +    cu_realloc_buffered((void **)&d_plist->cj4, h_plist->cj4, sizeof(*d_plist->cj4),
 +                         &d_plist->ncj4, &d_plist->cj4_nalloc,
 +                         h_plist->ncj4,
 +                         stream, true);
 +
 +    cu_realloc_buffered((void **)&d_plist->excl, h_plist->excl, sizeof(*d_plist->excl),
 +                         &d_plist->nexcl, &d_plist->excl_nalloc,
 +                         h_plist->nexcl,
 +                         stream, true);
 +
 +    if (bDoTime)
 +    {
 +        stat = cudaEventRecord(cu_nb->timers->stop_pl_h2d[iloc], stream);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +
 +    /* need to prune the pair list during the next step */
 +    d_plist->bDoPrune = true;
 +}
 +
 +void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t cu_nb,
 +                                const nbnxn_atomdata_t *nbatom)
 +{
 +    cu_atomdata_t *adat = cu_nb->atdat;
 +    cudaStream_t  ls    = cu_nb->stream[eintLocal];
 +
 +    /* only if we have a dynamic box */
 +    if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
 +    {
 +        cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 
 +                          SHIFTS * sizeof(*adat->shift_vec), ls);
 +        adat->bShiftVecUploaded = true;
 +    }
 +}
 +
 +/*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
 +static void nbnxn_cuda_clear_f(nbnxn_cuda_ptr_t cu_nb, int natoms_clear)
 +{
 +    cudaError_t   stat;
 +    cu_atomdata_t *adat = cu_nb->atdat;
 +    cudaStream_t  ls    = cu_nb->stream[eintLocal];
 +
 +    stat = cudaMemsetAsync(adat->f, 0, natoms_clear * sizeof(*adat->f), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on f falied");
 +}
 +
 +/*! Clears nonbonded shift force output array and energy outputs on the GPU. */
 +static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    cudaError_t   stat;
 +    cu_atomdata_t *adat = cu_nb->atdat;
 +    cudaStream_t  ls    = cu_nb->stream[eintLocal];
 +
 +    stat = cudaMemsetAsync(adat->fshift, 0, SHIFTS * sizeof(*adat->fshift), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on fshift falied");
 +    stat = cudaMemsetAsync(adat->e_lj, 0, sizeof(*adat->e_lj), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on e_lj falied");
 +    stat = cudaMemsetAsync(adat->e_el, 0, sizeof(*adat->e_el), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied");
 +}
 +
 +void nbnxn_cuda_clear_outputs(nbnxn_cuda_ptr_t cu_nb, int flags)
 +{
 +    nbnxn_cuda_clear_f(cu_nb, cu_nb->atdat->natoms);
 +    /* clear shift force array and energies if the outputs were 
 +       used in the current step */
 +    if (flags & GMX_FORCE_VIRIAL)
 +    {
 +        nbnxn_cuda_clear_e_fshift(cu_nb);
 +    }
 +}
 +
 +void nbnxn_cuda_init_atomdata(nbnxn_cuda_ptr_t cu_nb,
 +                              const nbnxn_atomdata_t *nbat)
 +{
 +    cudaError_t   stat;
 +    int           nalloc, natoms;
 +    bool          realloced;
 +    bool          bDoTime   = cu_nb->bDoTime;
 +    cu_timers_t   *timers   = cu_nb->timers;
 +    cu_atomdata_t *d_atdat  = cu_nb->atdat;
 +    cudaStream_t  ls        = cu_nb->stream[eintLocal];
 +
 +    natoms = nbat->natoms;
 +    realloced = false;
 +
 +    if (bDoTime)
 +    {
 +        /* time async copy */
 +        stat = cudaEventRecord(timers->start_atdat, ls);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +
 +    /* need to reallocate if we have to copy more atoms than the amount of space
 +       available and only allocate if we haven't initialized yet, i.e d_atdat->natoms == -1 */
 +    if (natoms > d_atdat->nalloc)
 +    {
 +        nalloc = over_alloc_small(natoms);
 +
 +        /* free up first if the arrays have already been initialized */
 +        if (d_atdat->nalloc != -1)
 +        {
 +            cu_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
 +            cu_free_buffered(d_atdat->xq);
 +            cu_free_buffered(d_atdat->atom_types);
 +        }
 +
 +        stat = cudaMalloc((void **)&d_atdat->f, nalloc*sizeof(*d_atdat->f));
 +        CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->f");
 +        stat = cudaMalloc((void **)&d_atdat->xq, nalloc*sizeof(*d_atdat->xq));
 +        CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->xq");
 +
 +        stat = cudaMalloc((void **)&d_atdat->atom_types, nalloc*sizeof(*d_atdat->atom_types));
 +        CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->atom_types");
 +
 +        d_atdat->nalloc = nalloc;
 +        realloced = true;
 +    }
 +
 +    d_atdat->natoms = natoms;
 +    d_atdat->natoms_local = nbat->natoms_local;
 +
 +    /* need to clear GPU f output if realloc happened */
 +    if (realloced)
 +    {
 +        nbnxn_cuda_clear_f(cu_nb, nalloc);
 +    }
 +
 +    cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
 +                      natoms*sizeof(*d_atdat->atom_types), ls);
 +
 +    if (bDoTime)
 +    {
 +        stat = cudaEventRecord(timers->stop_atdat, ls);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +}
 +
 +void nbnxn_cuda_free(FILE *fplog, nbnxn_cuda_ptr_t cu_nb)
 +{
 +    cudaError_t     stat;
 +    cu_atomdata_t   *atdat;
 +    cu_nbparam_t    *nbparam;
 +    cu_plist_t      *plist, *plist_nl;
 +    cu_timers_t     *timers;
 +
 +    if (cu_nb == NULL) return;
 +
 +    atdat       = cu_nb->atdat;
 +    nbparam     = cu_nb->nbparam;
 +    plist       = cu_nb->plist[eintLocal];
 +    plist_nl    = cu_nb->plist[eintNonlocal];
 +    timers      = cu_nb->timers;
 +
 +    if (nbparam->eeltype == eelCuEWALD || nbparam->eeltype == eelCuEWALD_TWIN)
 +    {
 +      stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref());
 +      CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed");
 +      cu_free_buffered(nbparam->coulomb_tab, &nbparam->coulomb_tab_size);
 +    }
 +
 +    stat = cudaEventDestroy(cu_nb->nonlocal_done);
 +    CU_RET_ERR(stat, "cudaEventDestroy failed on timers->nonlocal_done");
 +    stat = cudaEventDestroy(cu_nb->misc_ops_done);
 +    CU_RET_ERR(stat, "cudaEventDestroy failed on timers->misc_ops_done");
 +
 +    if (cu_nb->bDoTime)
 +    {
 +        stat = cudaEventDestroy(timers->start_atdat);
 +        CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_atdat");
 +        stat = cudaEventDestroy(timers->stop_atdat);
 +        CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_atdat");
 +
 +        /* The non-local counters/stream (second in the array) are needed only with DD. */
 +        for (int i = 0; i <= (cu_nb->bUseTwoStreams ? 1 : 0); i++)
 +        {
 +            stat = cudaEventDestroy(timers->start_nb_k[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_k");
 +            stat = cudaEventDestroy(timers->stop_nb_k[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_k");
 +
 +            stat = cudaEventDestroy(timers->start_pl_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_pl_h2d");
 +            stat = cudaEventDestroy(timers->stop_pl_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_pl_h2d");
 +
 +            stat = cudaStreamDestroy(cu_nb->stream[i]);
 +            CU_RET_ERR(stat, "cudaStreamDestroy failed on stream");
 +
 +            stat = cudaEventDestroy(timers->start_nb_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_h2d");
 +            stat = cudaEventDestroy(timers->stop_nb_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_h2d");
 +
 +            stat = cudaEventDestroy(timers->start_nb_d2h[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_d2h");
 +            stat = cudaEventDestroy(timers->stop_nb_d2h[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_d2h");
 +        }
 +    }
 +
 +    stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_texref());
 +    CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed");
 +    cu_free_buffered(nbparam->nbfp);
 +
 +    stat = cudaFree(atdat->shift_vec);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->shift_vec");
 +    stat = cudaFree(atdat->fshift);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->fshift");
 +
 +    stat = cudaFree(atdat->e_lj);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->e_lj");
 +    stat = cudaFree(atdat->e_el);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->e_el");
 +
 +    cu_free_buffered(atdat->f, &atdat->natoms, &atdat->nalloc);
 +    cu_free_buffered(atdat->xq);
 +    cu_free_buffered(atdat->atom_types, &atdat->ntypes);
 +
 +    cu_free_buffered(plist->sci, &plist->nsci, &plist->sci_nalloc);
 +    cu_free_buffered(plist->cj4, &plist->ncj4, &plist->cj4_nalloc);
 +    cu_free_buffered(plist->excl, &plist->nexcl, &plist->excl_nalloc);
 +    if (cu_nb->bUseTwoStreams)
 +    {
 +        cu_free_buffered(plist_nl->sci, &plist_nl->nsci, &plist_nl->sci_nalloc);
 +        cu_free_buffered(plist_nl->cj4, &plist_nl->ncj4, &plist_nl->cj4_nalloc);
 +        cu_free_buffered(plist_nl->excl, &plist_nl->nexcl, &plist->excl_nalloc);
 +    }
 +
 +    sfree(atdat);
 +    sfree(nbparam);
 +    sfree(plist);
 +    if (cu_nb->bUseTwoStreams)
 +    {
 +        sfree(plist_nl);
 +    }
 +    sfree(timers);
 +    sfree(cu_nb->timings);
 +    sfree(cu_nb);
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Cleaned up CUDA data structures.\n");
 +    }
 +}
 +
 +void cu_synchstream_atdat(nbnxn_cuda_ptr_t cu_nb, int iloc)
 +{
 +    cudaError_t stat;
 +    cudaStream_t stream = cu_nb->stream[iloc];
 +
 +    stat = cudaStreamWaitEvent(stream, cu_nb->timers->stop_atdat, 0);
 +    CU_RET_ERR(stat, "cudaStreamWaitEvent failed");
 +}
 +
 +wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    return (cu_nb != NULL && cu_nb->bDoTime) ? cu_nb->timings : NULL;
 +}
 +
 +void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    if (cu_nb->bDoTime)
 +    {
 +        init_timings(cu_nb->timings);
 +    }
 +}
 +
 +int nbnxn_cuda_min_ci_balanced(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    return cu_nb != NULL ?
 +        gpu_min_ci_balanced_factor*cu_nb->dev_info->prop.multiProcessorCount : 0;
 +
 +}
index 9c721039b04b1514f029ca174dcb3479bc8b0cf5,0000000000000000000000000000000000000000..7e09a77b36d88645c8bf8507ae17ac02ff909b08
mode 100644,000000..100644
--- /dev/null
@@@ -1,371 -1,0 +1,371 @@@
- #define NBK_FUNC_NAME(x, y) x ## _rf_ ## y
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2009, The GROMACS Development Team
 + *
 + * Gromacs is a library for molecular simulation and trajectory analysis,
 + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
 + * a full list of developers and information, check out http://www.gromacs.org
 + *
 + * This program is free software; you can redistribute it and/or modify it under
 + * the terms of the GNU Lesser General Public License as published by the Free
 + * Software Foundation; either version 2 of the License, or (at your option) any
 + * later version.
 + * As a special exception, you may use this file as part of a free software
 + * library without restriction.  Specifically, if other files instantiate
 + * templates or use macros or inline functions from this file, or you compile
 + * this file and link it with other files to produce an executable, this
 + * file does not by itself cause the resulting executable to be covered by
 + * the GNU Lesser General Public License.
 + *
 + * In plain-speak: do not worry about classes/macros/templates either - only
 + * changes to the library have to be LGPL, not an application linking with it.
 + *
 + * To help fund GROMACS development, we humbly ask that you cite
 + * the papers people have written on it - you can find them on the website!
 + */
 +
 +#define UNROLLI    NBNXN_CPU_CLUSTER_I_SIZE
 +#define UNROLLJ    NBNXN_CPU_CLUSTER_I_SIZE
 +
 +/* We could use nbat->xstride and nbat->fstride, but macros might be faster */
 +#define X_STRIDE   3
 +#define F_STRIDE   3
 +/* Local i-atom buffer strides */
 +#define XI_STRIDE  3
 +#define FI_STRIDE  3
 +
 +
 +/* All functionality defines are set here, except for:
 + * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
 + * CHECK_EXCLS, which is set just before including the inner loop contents.
 + */
 +
 +/* We always calculate shift forces, because it's cheap anyhow */
 +#define CALC_SHIFTFORCES
 +
 +#ifdef CALC_COUL_RF
- #define NBK_FUNC_NAME(x, y) x ## _tab_ ## y
++#define NBK_FUNC_NAME(base, ene) base ## _rf_ ## ene
 +#endif
 +#ifdef CALC_COUL_TAB
 +#ifndef VDW_CUTOFF_CHECK
- #define NBK_FUNC_NAME(x, y) x ## _tab_twin_ ## y
++#define NBK_FUNC_NAME(base, ene) base ## _tab_ ## ene
 +#else
++#define NBK_FUNC_NAME(base, ene) base ## _tab_twin_ ## ene
 +#endif
 +#endif
 +
 +static void
 +#ifndef CALC_ENERGIES
 +NBK_FUNC_NAME(nbnxn_kernel_ref, noener)
 +#else
 +#ifndef ENERGY_GROUPS
 +NBK_FUNC_NAME(nbnxn_kernel_ref, ener)
 +#else
 +NBK_FUNC_NAME(nbnxn_kernel_ref, energrp)
 +#endif
 +#endif
 +#undef NBK_FUNC_NAME
 +(const nbnxn_pairlist_t     *nbl,
 + const nbnxn_atomdata_t     *nbat,
 + const interaction_const_t  *ic,
 + rvec                       *shift_vec,
 + real                       *f
 +#ifdef CALC_SHIFTFORCES
 + ,
 + real                       *fshift
 +#endif
 +#ifdef CALC_ENERGIES
 + ,
 + real                       *Vvdw,
 + real                       *Vc
 +#endif
 +)
 +{
 +    const nbnxn_ci_t   *nbln;
 +    const nbnxn_cj_t   *l_cj;
 +    const int          *type;
 +    const real         *q;
 +    const real         *shiftvec;
 +    const real         *x;
 +    const real         *nbfp;
 +    real                rcut2;
 +#ifdef VDW_CUTOFF_CHECK
 +    real                rvdw2;
 +#endif
 +    int                 ntype2;
 +    real                facel;
 +    real               *nbfp_i;
 +    int                 n, ci, ci_sh;
 +    int                 ish, ishf;
 +    gmx_bool            do_LJ, half_LJ, do_coul;
 +    int                 cjind0, cjind1, cjind;
 +    int                 ip, jp;
 +
 +    real                xi[UNROLLI*XI_STRIDE];
 +    real                fi[UNROLLI*FI_STRIDE];
 +    real                qi[UNROLLI];
 +
 +#ifdef CALC_ENERGIES
 +#ifndef ENERGY_GROUPS
 +
 +    real       Vvdw_ci, Vc_ci;
 +#else
 +    int        egp_mask;
 +    int        egp_sh_i[UNROLLI];
 +#endif
 +    real       sh_invrc6;
 +#endif
 +
 +#ifdef CALC_COUL_RF
 +    real       k_rf2;
 +#ifdef CALC_ENERGIES
 +    real       k_rf, c_rf;
 +#endif
 +#endif
 +#ifdef CALC_COUL_TAB
 +    real       tabscale;
 +#ifdef CALC_ENERGIES
 +    real       halfsp;
 +#endif
 +#ifndef GMX_DOUBLE
 +    const real *tab_coul_FDV0;
 +#else
 +    const real *tab_coul_F;
 +    const real *tab_coul_V;
 +#endif
 +#endif
 +
 +    int ninner;
 +
 +#ifdef COUNT_PAIRS
 +    int npair = 0;
 +#endif
 +
 +#ifdef CALC_ENERGIES
 +    sh_invrc6 = ic->sh_invrc6;
 +#endif
 +
 +#ifdef CALC_COUL_RF
 +    k_rf2 = 2*ic->k_rf;
 +#ifdef CALC_ENERGIES
 +    k_rf = ic->k_rf;
 +    c_rf = ic->c_rf;
 +#endif
 +#endif
 +#ifdef CALC_COUL_TAB
 +    tabscale = ic->tabq_scale;
 +#ifdef CALC_ENERGIES
 +    halfsp = 0.5/ic->tabq_scale;
 +#endif
 +
 +#ifndef GMX_DOUBLE
 +    tab_coul_FDV0 = ic->tabq_coul_FDV0;
 +#else
 +    tab_coul_F    = ic->tabq_coul_F;
 +    tab_coul_V    = ic->tabq_coul_V;
 +#endif
 +#endif
 +
 +#ifdef ENERGY_GROUPS
 +    egp_mask = (1<<nbat->neg_2log) - 1;
 +#endif
 +
 +
 +    rcut2               = ic->rcoulomb*ic->rcoulomb;
 +#ifdef VDW_CUTOFF_CHECK
 +    rvdw2               = ic->rvdw*ic->rvdw;
 +#endif
 +
 +    ntype2              = nbat->ntype*2;
 +    nbfp                = nbat->nbfp;
 +    q                   = nbat->q;
 +    type                = nbat->type;
 +    facel               = ic->epsfac;
 +    shiftvec            = shift_vec[0];
 +    x                   = nbat->x;
 +
 +    l_cj = nbl->cj;
 +
 +    ninner = 0;
 +    for (n = 0; n < nbl->nci; n++)
 +    {
 +        int i, d;
 +
 +        nbln = &nbl->ci[n];
 +
 +        ish              = (nbln->shift & NBNXN_CI_SHIFT);
 +        /* x, f and fshift are assumed to be stored with stride 3 */
 +        ishf             = ish*DIM;
 +        cjind0           = nbln->cj_ind_start;
 +        cjind1           = nbln->cj_ind_end;
 +        /* Currently only works super-cells equal to sub-cells */
 +        ci               = nbln->ci;
 +        ci_sh            = (ish == CENTRAL ? ci : -1);
 +
 +        /* We have 5 LJ/C combinations, but use only three inner loops,
 +         * as the other combinations are unlikely and/or not much faster:
 +         * inner half-LJ + C for half-LJ + C / no-LJ + C
 +         * inner LJ + C      for full-LJ + C
 +         * inner LJ          for full-LJ + no-C / half-LJ + no-C
 +         */
 +        do_LJ   = (nbln->shift & NBNXN_CI_DO_LJ(0));
 +        do_coul = (nbln->shift & NBNXN_CI_DO_COUL(0));
 +        half_LJ = ((nbln->shift & NBNXN_CI_HALF_LJ(0)) || !do_LJ) && do_coul;
 +
 +#ifdef CALC_ENERGIES
 +#ifndef ENERGY_GROUPS
 +        Vvdw_ci = 0;
 +        Vc_ci   = 0;
 +#else
 +        for (i = 0; i < UNROLLI; i++)
 +        {
 +            egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
 +        }
 +#endif
 +#endif
 +
 +        for (i = 0; i < UNROLLI; i++)
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
 +                fi[i*FI_STRIDE+d] = 0;
 +            }
 +        }
 +
 +        if (do_coul)
 +        {
 +#ifdef CALC_ENERGIES
 +            real Vc_sub_self;
 +
 +#ifdef CALC_COUL_RF
 +            Vc_sub_self = 0.5*c_rf;
 +#endif
 +#ifdef CALC_COUL_TAB
 +#ifdef GMX_DOUBLE
 +            Vc_sub_self = 0.5*tab_coul_V[0];
 +#else
 +            Vc_sub_self = 0.5*tab_coul_FDV0[2];
 +#endif
 +#endif
 +#endif
 +
 +            for (i = 0; i < UNROLLI; i++)
 +            {
 +                qi[i] = facel*q[ci*UNROLLI+i];
 +
 +#ifdef CALC_ENERGIES
 +                if (l_cj[nbln->cj_ind_start].cj == ci_sh)
 +                {
 +#ifdef ENERGY_GROUPS
 +                    Vc[egp_sh_i[i]+((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)]
 +#else
 +                    Vc[0]
 +#endif
 +                        -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
 +                }
 +#endif
 +            }
 +        }
 +
 +        cjind = cjind0;
 +        while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
 +        {
 +#define CHECK_EXCLS
 +            if (half_LJ)
 +            {
 +#define CALC_COULOMB
 +#define HALF_LJ
 +#include "nbnxn_kernel_ref_inner.h"
 +#undef HALF_LJ
 +#undef CALC_COULOMB
 +            }
 +            /* cppcheck-suppress duplicateBranch */
 +            else if (do_coul)
 +            {
 +#define CALC_COULOMB
 +#include "nbnxn_kernel_ref_inner.h"
 +#undef CALC_COULOMB
 +            }
 +            else
 +            {
 +#include "nbnxn_kernel_ref_inner.h"
 +            }
 +#undef CHECK_EXCLS
 +            cjind++;
 +        }
 +
 +        for (; (cjind < cjind1); cjind++)
 +        {
 +            if (half_LJ)
 +            {
 +#define CALC_COULOMB
 +#define HALF_LJ
 +#include "nbnxn_kernel_ref_inner.h"
 +#undef HALF_LJ
 +#undef CALC_COULOMB
 +            }
 +            /* cppcheck-suppress duplicateBranch */
 +            else if (do_coul)
 +            {
 +#define CALC_COULOMB
 +#include "nbnxn_kernel_ref_inner.h"
 +#undef CALC_COULOMB
 +            }
 +            else
 +            {
 +#include "nbnxn_kernel_ref_inner.h"
 +            }
 +        }
 +        ninner += cjind1 - cjind0;
 +
 +        /* Add accumulated i-forces to the force array */
 +        for (i = 0; i < UNROLLI; i++)
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
 +            }
 +        }
 +#ifdef CALC_SHIFTFORCES
 +        if (fshift != NULL)
 +        {
 +            /* Add i forces to shifted force list */
 +            for (i = 0; i < UNROLLI; i++)
 +            {
 +                for (d = 0; d < DIM; d++)
 +                {
 +                    fshift[ishf+d] += fi[i*FI_STRIDE+d];
 +                }
 +            }
 +        }
 +#endif
 +
 +#ifdef CALC_ENERGIES
 +#ifndef ENERGY_GROUPS
 +        *Vvdw += Vvdw_ci;
 +        *Vc   += Vc_ci;
 +#endif
 +#endif
 +    }
 +
 +#ifdef COUNT_PAIRS
 +    printf("atom pairs %d\n", npair);
 +#endif
 +}
 +
 +#undef CALC_SHIFTFORCES
 +
 +#undef X_STRIDE
 +#undef F_STRIDE
 +#undef XI_STRIDE
 +#undef FI_STRIDE
 +
 +#undef UNROLLI
 +#undef UNROLLJ
index ef8cd2d08a524fac028baaddbbd47d61261187d0,0000000000000000000000000000000000000000..7829fcdb95fbacac51fa409f868bb00a265dfa8f
mode 100644,000000..100644
--- /dev/null
@@@ -1,5125 -1,0 +1,5127 @@@
-                        grid->c0[YY]+cy*grid->sy, grid->inv_sy,
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "maths.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "nbnxn_consts.h"
 +#include "nbnxn_internal.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_search.h"
 +#include "gmx_cyclecounter.h"
 +#include "gmxfio.h"
 +#include "gmx_omp_nthreads.h"
 +#include "nrnb.h"
 +
 +
 +/* Pair search box lower and upper corner in x,y,z.
 + * Store this in 4 iso 3 reals, which is useful with SSE.
 + * To avoid complicating the code we also use 4 without SSE.
 + */
 +#define NNBSBB_C         4
 +#define NNBSBB_B         (2*NNBSBB_C)
 +/* Pair search box lower and upper bound in z only. */
 +#define NNBSBB_D         2
 +/* Pair search box lower and upper corner x,y,z indices */
 +#define BBL_X  0
 +#define BBL_Y  1
 +#define BBL_Z  2
 +#define BBU_X  4
 +#define BBU_Y  5
 +#define BBU_Z  6
 +
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +/* We use SSE or AVX-128bit for bounding box calculations */
 +
 +#ifndef GMX_DOUBLE
 +/* Single precision BBs + coordinates, we can also load coordinates using SSE */
 +#define NBNXN_SEARCH_SSE_SINGLE
 +#endif
 +
 +/* Include basic SSE2 stuff */
 +#include <emmintrin.h>
 +
 +#if defined NBNXN_SEARCH_SSE_SINGLE && (GPU_NSUBCELL == 4 || GPU_NSUBCELL == 8)
 +/* Store bounding boxes with x, y and z coordinates in packs of 4 */
 +#define NBNXN_PBB_SSE
 +#endif
 +
 +/* The width of SSE/AVX128 with single precision for bounding boxes with GPU.
 + * Here AVX-256 turns out to be slightly slower than AVX-128.
 + */
 +#define STRIDE_PBB        4
 +#define STRIDE_PBB_2LOG   2
 +
 +#endif /* NBNXN_SEARCH_BB_SSE */
 +
 +#ifdef GMX_NBNXN_SIMD
 +
 +/* The functions below are macros as they are performance sensitive */
 +
 +/* 4x4 list, pack=4: no complex conversion required */
 +/* i-cluster to j-cluster conversion */
 +#define CI_TO_CJ_J4(ci)   (ci)
 +/* cluster index to coordinate array index conversion */
 +#define X_IND_CI_J4(ci)  ((ci)*STRIDE_P4)
 +#define X_IND_CJ_J4(cj)  ((cj)*STRIDE_P4)
 +
 +/* 4x2 list, pack=4: j-cluster size is half the packing width */
 +/* i-cluster to j-cluster conversion */
 +#define CI_TO_CJ_J2(ci)  ((ci)<<1)
 +/* cluster index to coordinate array index conversion */
 +#define X_IND_CI_J2(ci)  ((ci)*STRIDE_P4)
 +#define X_IND_CJ_J2(cj)  (((cj)>>1)*STRIDE_P4 + ((cj) & 1)*(PACK_X4>>1))
 +
 +/* 4x8 list, pack=8: i-cluster size is half the packing width */
 +/* i-cluster to j-cluster conversion */
 +#define CI_TO_CJ_J8(ci)  ((ci)>>1)
 +/* cluster index to coordinate array index conversion */
 +#define X_IND_CI_J8(ci)  (((ci)>>1)*STRIDE_P8 + ((ci) & 1)*(PACK_X8>>1))
 +#define X_IND_CJ_J8(cj)  ((cj)*STRIDE_P8)
 +
 +/* The j-cluster size is matched to the SIMD width */
 +#if GMX_NBNXN_SIMD_BITWIDTH == 128
 +#ifdef GMX_DOUBLE
 +#define CI_TO_CJ_SIMD_4XN(ci)  CI_TO_CJ_J2(ci)
 +#define X_IND_CI_SIMD_4XN(ci)  X_IND_CI_J2(ci)
 +#define X_IND_CJ_SIMD_4XN(cj)  X_IND_CJ_J2(cj)
 +#else
 +#define CI_TO_CJ_SIMD_4XN(ci)  CI_TO_CJ_J4(ci)
 +#define X_IND_CI_SIMD_4XN(ci)  X_IND_CI_J4(ci)
 +#define X_IND_CJ_SIMD_4XN(cj)  X_IND_CJ_J4(cj)
 +#endif
 +#else
 +#if GMX_NBNXN_SIMD_BITWIDTH == 256
 +#ifdef GMX_DOUBLE
 +#define CI_TO_CJ_SIMD_4XN(ci)  CI_TO_CJ_J4(ci)
 +#define X_IND_CI_SIMD_4XN(ci)  X_IND_CI_J4(ci)
 +#define X_IND_CJ_SIMD_4XN(cj)  X_IND_CJ_J4(cj)
 +#else
 +#define CI_TO_CJ_SIMD_4XN(ci)  CI_TO_CJ_J8(ci)
 +#define X_IND_CI_SIMD_4XN(ci)  X_IND_CI_J8(ci)
 +#define X_IND_CJ_SIMD_4XN(cj)  X_IND_CJ_J8(cj)
 +/* Half SIMD with j-cluster size */
 +#define CI_TO_CJ_SIMD_2XNN(ci) CI_TO_CJ_J4(ci)
 +#define X_IND_CI_SIMD_2XNN(ci) X_IND_CI_J4(ci)
 +#define X_IND_CJ_SIMD_2XNN(cj) X_IND_CJ_J4(cj)
 +#endif
 +#else
 +#error "unsupported GMX_NBNXN_SIMD_WIDTH"
 +#endif
 +#endif
 +
 +#endif /* GMX_NBNXN_SIMD */
 +
 +
 +/* Interaction masks for 4xN atom interactions.
 + * Bit i*CJ_SIZE + j tells if atom i and j interact.
 + */
 +/* All interaction mask is the same for all kernels */
 +#define NBNXN_INT_MASK_ALL        0xffffffff
 +/* 4x4 kernel diagonal mask */
 +#define NBNXN_INT_MASK_DIAG       0x08ce
 +/* 4x2 kernel diagonal masks */
 +#define NBNXN_INT_MASK_DIAG_J2_0  0x0002
 +#define NBNXN_INT_MASK_DIAG_J2_1  0x002F
 +/* 4x8 kernel diagonal masks */
 +#define NBNXN_INT_MASK_DIAG_J8_0  0xf0f8fcfe
 +#define NBNXN_INT_MASK_DIAG_J8_1  0x0080c0e0
 +
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +/* Store bounding boxes corners as quadruplets: xxxxyyyyzzzz */
 +#define NBNXN_BBXXXX
 +/* Size of bounding box corners quadruplet */
 +#define NNBSBB_XXXX      (NNBSBB_D*DIM*STRIDE_PBB)
 +#endif
 +
 +/* We shift the i-particles backward for PBC.
 + * This leads to more conditionals than shifting forward.
 + * We do this to get more balanced pair lists.
 + */
 +#define NBNXN_SHIFT_BACKWARD
 +
 +
 +/* This define is a lazy way to avoid interdependence of the grid
 + * and searching data structures.
 + */
 +#define NBNXN_NA_SC_MAX (GPU_NSUBCELL*NBNXN_GPU_CLUSTER_SIZE)
 +
 +
 +static void nbs_cycle_clear(nbnxn_cycle_t *cc)
 +{
 +    int i;
 +
 +    for (i = 0; i < enbsCCnr; i++)
 +    {
 +        cc[i].count = 0;
 +        cc[i].c     = 0;
 +    }
 +}
 +
 +static double Mcyc_av(const nbnxn_cycle_t *cc)
 +{
 +    return (double)cc->c*1e-6/cc->count;
 +}
 +
 +static void nbs_cycle_print(FILE *fp, const nbnxn_search_t nbs)
 +{
 +    int n;
 +    int t;
 +
 +    fprintf(fp, "\n");
 +    fprintf(fp, "ns %4d grid %4.1f search %4.1f red.f %5.3f",
 +            nbs->cc[enbsCCgrid].count,
 +            Mcyc_av(&nbs->cc[enbsCCgrid]),
 +            Mcyc_av(&nbs->cc[enbsCCsearch]),
 +            Mcyc_av(&nbs->cc[enbsCCreducef]));
 +
 +    if (nbs->nthread_max > 1)
 +    {
 +        if (nbs->cc[enbsCCcombine].count > 0)
 +        {
 +            fprintf(fp, " comb %5.2f",
 +                    Mcyc_av(&nbs->cc[enbsCCcombine]));
 +        }
 +        fprintf(fp, " s. th");
 +        for (t = 0; t < nbs->nthread_max; t++)
 +        {
 +            fprintf(fp, " %4.1f",
 +                    Mcyc_av(&nbs->work[t].cc[enbsCCsearch]));
 +        }
 +    }
 +    fprintf(fp, "\n");
 +}
 +
 +static void nbnxn_grid_init(nbnxn_grid_t * grid)
 +{
 +    grid->cxy_na      = NULL;
 +    grid->cxy_ind     = NULL;
 +    grid->cxy_nalloc  = 0;
 +    grid->bb          = NULL;
 +    grid->bbj         = NULL;
 +    grid->nc_nalloc   = 0;
 +}
 +
 +static int get_2log(int n)
 +{
 +    int log2;
 +
 +    log2 = 0;
 +    while ((1<<log2) < n)
 +    {
 +        log2++;
 +    }
 +    if ((1<<log2) != n)
 +    {
 +        gmx_fatal(FARGS, "nbnxn na_c (%d) is not a power of 2", n);
 +    }
 +
 +    return log2;
 +}
 +
 +static int nbnxn_kernel_to_ci_size(int nb_kernel_type)
 +{
 +    switch (nb_kernel_type)
 +    {
 +        case nbnxnk4x4_PlainC:
 +        case nbnxnk4xN_SIMD_4xN:
 +        case nbnxnk4xN_SIMD_2xNN:
 +            return NBNXN_CPU_CLUSTER_I_SIZE;
 +        case nbnxnk8x8x8_CUDA:
 +        case nbnxnk8x8x8_PlainC:
 +            /* The cluster size for super/sub lists is only set here.
 +             * Any value should work for the pair-search and atomdata code.
 +             * The kernels, of course, might require a particular value.
 +             */
 +            return NBNXN_GPU_CLUSTER_SIZE;
 +        default:
 +            gmx_incons("unknown kernel type");
 +    }
 +
 +    return 0;
 +}
 +
 +int nbnxn_kernel_to_cj_size(int nb_kernel_type)
 +{
 +    int nbnxn_simd_width = 0;
 +    int cj_size          = 0;
 +
 +#ifdef GMX_NBNXN_SIMD
 +    nbnxn_simd_width = GMX_NBNXN_SIMD_BITWIDTH/(sizeof(real)*8);
 +#endif
 +
 +    switch (nb_kernel_type)
 +    {
 +        case nbnxnk4x4_PlainC:
 +            cj_size = NBNXN_CPU_CLUSTER_I_SIZE;
 +            break;
 +        case nbnxnk4xN_SIMD_4xN:
 +            cj_size = nbnxn_simd_width;
 +            break;
 +        case nbnxnk4xN_SIMD_2xNN:
 +            cj_size = nbnxn_simd_width/2;
 +            break;
 +        case nbnxnk8x8x8_CUDA:
 +        case nbnxnk8x8x8_PlainC:
 +            cj_size = nbnxn_kernel_to_ci_size(nb_kernel_type);
 +            break;
 +        default:
 +            gmx_incons("unknown kernel type");
 +    }
 +
 +    return cj_size;
 +}
 +
 +static int ci_to_cj(int na_cj_2log, int ci)
 +{
 +    switch (na_cj_2log)
 +    {
 +        case 2: return ci;     break;
 +        case 1: return (ci<<1); break;
 +        case 3: return (ci>>1); break;
 +    }
 +
 +    return 0;
 +}
 +
 +gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
 +{
 +    if (nb_kernel_type == nbnxnkNotSet)
 +    {
 +        gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
 +    }
 +
 +    switch (nb_kernel_type)
 +    {
 +        case nbnxnk8x8x8_CUDA:
 +        case nbnxnk8x8x8_PlainC:
 +            return FALSE;
 +
 +        case nbnxnk4x4_PlainC:
 +        case nbnxnk4xN_SIMD_4xN:
 +        case nbnxnk4xN_SIMD_2xNN:
 +            return TRUE;
 +
 +        default:
 +            gmx_incons("Invalid nonbonded kernel type passed!");
 +            return FALSE;
 +    }
 +}
 +
 +void nbnxn_init_search(nbnxn_search_t    * nbs_ptr,
 +                       ivec               *n_dd_cells,
 +                       gmx_domdec_zones_t *zones,
 +                       int                 nthread_max)
 +{
 +    nbnxn_search_t nbs;
 +    int            d, g, t;
 +
 +    snew(nbs, 1);
 +    *nbs_ptr = nbs;
 +
 +    nbs->DomDec = (n_dd_cells != NULL);
 +
 +    clear_ivec(nbs->dd_dim);
 +    nbs->ngrid = 1;
 +    if (nbs->DomDec)
 +    {
 +        nbs->zones = zones;
 +
 +        for (d = 0; d < DIM; d++)
 +        {
 +            if ((*n_dd_cells)[d] > 1)
 +            {
 +                nbs->dd_dim[d] = 1;
 +                /* Each grid matches a DD zone */
 +                nbs->ngrid *= 2;
 +            }
 +        }
 +    }
 +
 +    snew(nbs->grid, nbs->ngrid);
 +    for (g = 0; g < nbs->ngrid; g++)
 +    {
 +        nbnxn_grid_init(&nbs->grid[g]);
 +    }
 +    nbs->cell        = NULL;
 +    nbs->cell_nalloc = 0;
 +    nbs->a           = NULL;
 +    nbs->a_nalloc    = 0;
 +
 +    nbs->nthread_max = nthread_max;
 +
 +    /* Initialize the work data structures for each thread */
 +    snew(nbs->work, nbs->nthread_max);
 +    for (t = 0; t < nbs->nthread_max; t++)
 +    {
 +        nbs->work[t].cxy_na           = NULL;
 +        nbs->work[t].cxy_na_nalloc    = 0;
 +        nbs->work[t].sort_work        = NULL;
 +        nbs->work[t].sort_work_nalloc = 0;
 +    }
 +
 +    /* Initialize detailed nbsearch cycle counting */
 +    nbs->print_cycles = (getenv("GMX_NBNXN_CYCLE") != 0);
 +    nbs->search_count = 0;
 +    nbs_cycle_clear(nbs->cc);
 +    for (t = 0; t < nbs->nthread_max; t++)
 +    {
 +        nbs_cycle_clear(nbs->work[t].cc);
 +    }
 +}
 +
 +static real grid_atom_density(int n, rvec corner0, rvec corner1)
 +{
 +    rvec size;
 +
 +    rvec_sub(corner1, corner0, size);
 +
 +    return n/(size[XX]*size[YY]*size[ZZ]);
 +}
 +
 +static int set_grid_size_xy(const nbnxn_search_t nbs,
 +                            nbnxn_grid_t *grid,
 +                            int dd_zone,
 +                            int n, rvec corner0, rvec corner1,
 +                            real atom_density,
 +                            int XFormat)
 +{
 +    rvec size;
 +    int  na_c;
 +    real adens, tlen, tlen_x, tlen_y, nc_max;
 +    int  t;
 +
 +    rvec_sub(corner1, corner0, size);
 +
 +    if (n > grid->na_sc)
 +    {
 +        /* target cell length */
 +        if (grid->bSimple)
 +        {
 +            /* To minimize the zero interactions, we should make
 +             * the largest of the i/j cell cubic.
 +             */
 +            na_c = max(grid->na_c, grid->na_cj);
 +
 +            /* Approximately cubic cells */
 +            tlen   = pow(na_c/atom_density, 1.0/3.0);
 +            tlen_x = tlen;
 +            tlen_y = tlen;
 +        }
 +        else
 +        {
 +            /* Approximately cubic sub cells */
 +            tlen   = pow(grid->na_c/atom_density, 1.0/3.0);
 +            tlen_x = tlen*GPU_NSUBCELL_X;
 +            tlen_y = tlen*GPU_NSUBCELL_Y;
 +        }
 +        /* We round ncx and ncy down, because we get less cell pairs
 +         * in the nbsist when the fixed cell dimensions (x,y) are
 +         * larger than the variable one (z) than the other way around.
 +         */
 +        grid->ncx = max(1, (int)(size[XX]/tlen_x));
 +        grid->ncy = max(1, (int)(size[YY]/tlen_y));
 +    }
 +    else
 +    {
 +        grid->ncx = 1;
 +        grid->ncy = 1;
 +    }
 +
 +    grid->sx     = size[XX]/grid->ncx;
 +    grid->sy     = size[YY]/grid->ncy;
 +    grid->inv_sx = 1/grid->sx;
 +    grid->inv_sy = 1/grid->sy;
 +
 +    if (dd_zone > 0)
 +    {
 +        /* This is a non-home zone, add an extra row of cells
 +         * for particles communicated for bonded interactions.
 +         * These can be beyond the cut-off. It doesn't matter where
 +         * they end up on the grid, but for performance it's better
 +         * if they don't end up in cells that can be within cut-off range.
 +         */
 +        grid->ncx++;
 +        grid->ncy++;
 +    }
 +
 +    /* We need one additional cell entry for particles moved by DD */
 +    if (grid->ncx*grid->ncy+1 > grid->cxy_nalloc)
 +    {
 +        grid->cxy_nalloc = over_alloc_large(grid->ncx*grid->ncy+1);
 +        srenew(grid->cxy_na, grid->cxy_nalloc);
 +        srenew(grid->cxy_ind, grid->cxy_nalloc+1);
 +    }
 +    for (t = 0; t < nbs->nthread_max; t++)
 +    {
 +        if (grid->ncx*grid->ncy+1 > nbs->work[t].cxy_na_nalloc)
 +        {
 +            nbs->work[t].cxy_na_nalloc = over_alloc_large(grid->ncx*grid->ncy+1);
 +            srenew(nbs->work[t].cxy_na, nbs->work[t].cxy_na_nalloc);
 +        }
 +    }
 +
 +    /* Worst case scenario of 1 atom in each last cell */
 +    if (grid->na_cj <= grid->na_c)
 +    {
 +        nc_max = n/grid->na_sc + grid->ncx*grid->ncy;
 +    }
 +    else
 +    {
 +        nc_max = n/grid->na_sc + grid->ncx*grid->ncy*grid->na_cj/grid->na_c;
 +    }
 +
 +    if (nc_max > grid->nc_nalloc)
 +    {
 +        int bb_nalloc;
 +
 +        grid->nc_nalloc = over_alloc_large(nc_max);
 +        srenew(grid->nsubc, grid->nc_nalloc);
 +        srenew(grid->bbcz, grid->nc_nalloc*NNBSBB_D);
 +#ifdef NBNXN_PBB_SSE
 +        bb_nalloc = grid->nc_nalloc*GPU_NSUBCELL/STRIDE_PBB*NNBSBB_XXXX;
 +#else
 +        bb_nalloc = grid->nc_nalloc*GPU_NSUBCELL*NNBSBB_B;
 +#endif
 +        sfree_aligned(grid->bb);
 +        /* This snew also zeros the contents, this avoid possible
 +         * floating exceptions in SSE with the unused bb elements.
 +         */
 +        snew_aligned(grid->bb, bb_nalloc, 16);
 +
 +        if (grid->bSimple)
 +        {
 +            if (grid->na_cj == grid->na_c)
 +            {
 +                grid->bbj = grid->bb;
 +            }
 +            else
 +            {
 +                sfree_aligned(grid->bbj);
 +                snew_aligned(grid->bbj, bb_nalloc*grid->na_c/grid->na_cj, 16);
 +            }
 +        }
 +
 +        srenew(grid->flags, grid->nc_nalloc);
 +    }
 +
 +    copy_rvec(corner0, grid->c0);
 +    copy_rvec(corner1, grid->c1);
 +
 +    return nc_max;
 +}
 +
 +/* We need to sort paricles in grid columns on z-coordinate.
 + * As particle are very often distributed homogeneously, we a sorting
 + * algorithm similar to pigeonhole sort. We multiply the z-coordinate
 + * by a factor, cast to an int and try to store in that hole. If the hole
 + * is full, we move this or another particle. A second pass is needed to make
 + * contiguous elements. SORT_GRID_OVERSIZE is the ratio of holes to particles.
 + * 4 is the optimal value for homogeneous particle distribution and allows
 + * for an O(#particles) sort up till distributions were all particles are
 + * concentrated in 1/4 of the space. No NlogN fallback is implemented,
 + * as it can be expensive to detect imhomogeneous particle distributions.
 + * SGSF is the maximum ratio of holes used, in the worst case all particles
 + * end up in the last hole and we need #particles extra holes at the end.
 + */
 +#define SORT_GRID_OVERSIZE 4
 +#define SGSF (SORT_GRID_OVERSIZE + 1)
 +
 +/* Sort particle index a on coordinates x along dim.
 + * Backwards tells if we want decreasing iso increasing coordinates.
 + * h0 is the minimum of the coordinate range.
 + * invh is the inverse hole spacing.
 + * nsort, the theortical hole limit, is only used for debugging.
 + * sort is the sorting work array.
 + */
 +static void sort_atoms(int dim, gmx_bool Backwards,
 +                       int *a, int n, rvec *x,
 +                       real h0, real invh, int nsort, int *sort)
 +{
 +    int i, c;
 +    int zi, zim, zi_min, zi_max;
 +    int cp, tmp;
 +
 +    if (n <= 1)
 +    {
 +        /* Nothing to do */
 +        return;
 +    }
 +
 +    /* Determine the index range used, so we can limit it for the second pass */
 +    zi_min = INT_MAX;
 +    zi_max = -1;
 +
 +    /* Sort the particles using a simple index sort */
 +    for (i = 0; i < n; i++)
 +    {
 +        /* The cast takes care of float-point rounding effects below zero.
 +         * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
 +         * times the box height out of the box.
 +         */
 +        zi = (int)((x[a[i]][dim] - h0)*invh);
 +
 +#ifdef DEBUG_NBNXN_GRIDDING
 +        if (zi < 0 || zi >= nsort)
 +        {
 +            gmx_fatal(FARGS, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d\n",
 +                      a[i], 'x'+dim, x[a[i]][dim], h0, invh, zi, nsort);
 +        }
 +#endif
 +
 +        /* Ideally this particle should go in sort cell zi,
 +         * but that might already be in use,
 +         * in that case find the first empty cell higher up
 +         */
 +        if (sort[zi] < 0)
 +        {
 +            sort[zi] = a[i];
 +            zi_min   = min(zi_min, zi);
 +            zi_max   = max(zi_max, zi);
 +        }
 +        else
 +        {
 +            /* We have multiple atoms in the same sorting slot.
 +             * Sort on real z for minimal bounding box size.
 +             * There is an extra check for identical z to ensure
 +             * well-defined output order, independent of input order
 +             * to ensure binary reproducibility after restarts.
 +             */
 +            while (sort[zi] >= 0 && ( x[a[i]][dim] >  x[sort[zi]][dim] ||
 +                                      (x[a[i]][dim] == x[sort[zi]][dim] &&
 +                                       a[i] > sort[zi])))
 +            {
 +                zi++;
 +            }
 +
 +            if (sort[zi] >= 0)
 +            {
 +                /* Shift all elements by one slot until we find an empty slot */
 +                cp  = sort[zi];
 +                zim = zi + 1;
 +                while (sort[zim] >= 0)
 +                {
 +                    tmp       = sort[zim];
 +                    sort[zim] = cp;
 +                    cp        = tmp;
 +                    zim++;
 +                }
 +                sort[zim] = cp;
 +                zi_max    = max(zi_max, zim);
 +            }
 +            sort[zi] = a[i];
 +            zi_max   = max(zi_max, zi);
 +        }
 +    }
 +
 +    c = 0;
 +    if (!Backwards)
 +    {
 +        for (zi = 0; zi < nsort; zi++)
 +        {
 +            if (sort[zi] >= 0)
 +            {
 +                a[c++]   = sort[zi];
 +                sort[zi] = -1;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for (zi = zi_max; zi >= zi_min; zi--)
 +        {
 +            if (sort[zi] >= 0)
 +            {
 +                a[c++]   = sort[zi];
 +                sort[zi] = -1;
 +            }
 +        }
 +    }
 +    if (c < n)
 +    {
 +        gmx_incons("Lost particles while sorting");
 +    }
 +}
 +
 +#ifdef GMX_DOUBLE
 +#define R2F_D(x) ((float)((x) >= 0 ? ((1-GMX_FLOAT_EPS)*(x)) : ((1+GMX_FLOAT_EPS)*(x))))
 +#define R2F_U(x) ((float)((x) >= 0 ? ((1+GMX_FLOAT_EPS)*(x)) : ((1-GMX_FLOAT_EPS)*(x))))
 +#else
 +#define R2F_D(x) (x)
 +#define R2F_U(x) (x)
 +#endif
 +
 +/* Coordinate order x,y,z, bb order xyz0 */
 +static void calc_bounding_box(int na, int stride, const real *x, float *bb)
 +{
 +    int  i, j;
 +    real xl, xh, yl, yh, zl, zh;
 +
 +    i  = 0;
 +    xl = x[i+XX];
 +    xh = x[i+XX];
 +    yl = x[i+YY];
 +    yh = x[i+YY];
 +    zl = x[i+ZZ];
 +    zh = x[i+ZZ];
 +    i += stride;
 +    for (j = 1; j < na; j++)
 +    {
 +        xl = min(xl, x[i+XX]);
 +        xh = max(xh, x[i+XX]);
 +        yl = min(yl, x[i+YY]);
 +        yh = max(yh, x[i+YY]);
 +        zl = min(zl, x[i+ZZ]);
 +        zh = max(zh, x[i+ZZ]);
 +        i += stride;
 +    }
 +    /* Note: possible double to float conversion here */
 +    bb[BBL_X] = R2F_D(xl);
 +    bb[BBL_Y] = R2F_D(yl);
 +    bb[BBL_Z] = R2F_D(zl);
 +    bb[BBU_X] = R2F_U(xh);
 +    bb[BBU_Y] = R2F_U(yh);
 +    bb[BBU_Z] = R2F_U(zh);
 +}
 +
 +/* Packed coordinates, bb order xyz0 */
 +static void calc_bounding_box_x_x4(int na, const real *x, float *bb)
 +{
 +    int  j;
 +    real xl, xh, yl, yh, zl, zh;
 +
 +    xl = x[XX*PACK_X4];
 +    xh = x[XX*PACK_X4];
 +    yl = x[YY*PACK_X4];
 +    yh = x[YY*PACK_X4];
 +    zl = x[ZZ*PACK_X4];
 +    zh = x[ZZ*PACK_X4];
 +    for (j = 1; j < na; j++)
 +    {
 +        xl = min(xl, x[j+XX*PACK_X4]);
 +        xh = max(xh, x[j+XX*PACK_X4]);
 +        yl = min(yl, x[j+YY*PACK_X4]);
 +        yh = max(yh, x[j+YY*PACK_X4]);
 +        zl = min(zl, x[j+ZZ*PACK_X4]);
 +        zh = max(zh, x[j+ZZ*PACK_X4]);
 +    }
 +    /* Note: possible double to float conversion here */
 +    bb[BBL_X] = R2F_D(xl);
 +    bb[BBL_Y] = R2F_D(yl);
 +    bb[BBL_Z] = R2F_D(zl);
 +    bb[BBU_X] = R2F_U(xh);
 +    bb[BBU_Y] = R2F_U(yh);
 +    bb[BBU_Z] = R2F_U(zh);
 +}
 +
 +/* Packed coordinates, bb order xyz0 */
 +static void calc_bounding_box_x_x8(int na, const real *x, float *bb)
 +{
 +    int  j;
 +    real xl, xh, yl, yh, zl, zh;
 +
 +    xl = x[XX*PACK_X8];
 +    xh = x[XX*PACK_X8];
 +    yl = x[YY*PACK_X8];
 +    yh = x[YY*PACK_X8];
 +    zl = x[ZZ*PACK_X8];
 +    zh = x[ZZ*PACK_X8];
 +    for (j = 1; j < na; j++)
 +    {
 +        xl = min(xl, x[j+XX*PACK_X8]);
 +        xh = max(xh, x[j+XX*PACK_X8]);
 +        yl = min(yl, x[j+YY*PACK_X8]);
 +        yh = max(yh, x[j+YY*PACK_X8]);
 +        zl = min(zl, x[j+ZZ*PACK_X8]);
 +        zh = max(zh, x[j+ZZ*PACK_X8]);
 +    }
 +    /* Note: possible double to float conversion here */
 +    bb[BBL_X] = R2F_D(xl);
 +    bb[BBL_Y] = R2F_D(yl);
 +    bb[BBL_Z] = R2F_D(zl);
 +    bb[BBU_X] = R2F_U(xh);
 +    bb[BBU_Y] = R2F_U(yh);
 +    bb[BBU_Z] = R2F_U(zh);
 +}
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +
 +/* Packed coordinates, bb order xyz0 */
 +static void calc_bounding_box_x_x4_halves(int na, const real *x,
 +                                          float *bb, float *bbj)
 +{
 +    calc_bounding_box_x_x4(min(na, 2), x, bbj);
 +
 +    if (na > 2)
 +    {
 +        calc_bounding_box_x_x4(min(na-2, 2), x+(PACK_X4>>1), bbj+NNBSBB_B);
 +    }
 +    else
 +    {
 +        /* Set the "empty" bounding box to the same as the first one,
 +         * so we don't need to treat special cases in the rest of the code.
 +         */
 +        _mm_store_ps(bbj+NNBSBB_B, _mm_load_ps(bbj));
 +        _mm_store_ps(bbj+NNBSBB_B+NNBSBB_C, _mm_load_ps(bbj+NNBSBB_C));
 +    }
 +
 +    _mm_store_ps(bb, _mm_min_ps(_mm_load_ps(bbj),
 +                                _mm_load_ps(bbj+NNBSBB_B)));
 +    _mm_store_ps(bb+NNBSBB_C, _mm_max_ps(_mm_load_ps(bbj+NNBSBB_C),
 +                                         _mm_load_ps(bbj+NNBSBB_B+NNBSBB_C)));
 +}
 +
 +/* Coordinate order xyz, bb order xxxxyyyyzzzz */
 +static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
 +{
 +    int  i, j;
 +    real xl, xh, yl, yh, zl, zh;
 +
 +    i  = 0;
 +    xl = x[i+XX];
 +    xh = x[i+XX];
 +    yl = x[i+YY];
 +    yh = x[i+YY];
 +    zl = x[i+ZZ];
 +    zh = x[i+ZZ];
 +    i += stride;
 +    for (j = 1; j < na; j++)
 +    {
 +        xl = min(xl, x[i+XX]);
 +        xh = max(xh, x[i+XX]);
 +        yl = min(yl, x[i+YY]);
 +        yh = max(yh, x[i+YY]);
 +        zl = min(zl, x[i+ZZ]);
 +        zh = max(zh, x[i+ZZ]);
 +        i += stride;
 +    }
 +    /* Note: possible double to float conversion here */
 +    bb[0*STRIDE_PBB] = R2F_D(xl);
 +    bb[1*STRIDE_PBB] = R2F_D(yl);
 +    bb[2*STRIDE_PBB] = R2F_D(zl);
 +    bb[3*STRIDE_PBB] = R2F_U(xh);
 +    bb[4*STRIDE_PBB] = R2F_U(yh);
 +    bb[5*STRIDE_PBB] = R2F_U(zh);
 +}
 +
 +#endif /* NBNXN_SEARCH_BB_SSE */
 +
 +#ifdef NBNXN_SEARCH_SSE_SINGLE
 +
 +/* Coordinate order xyz?, bb order xyz0 */
 +static void calc_bounding_box_sse(int na, const float *x, float *bb)
 +{
 +    __m128 bb_0_SSE, bb_1_SSE;
 +    __m128 x_SSE;
 +
 +    int    i;
 +
 +    bb_0_SSE = _mm_load_ps(x);
 +    bb_1_SSE = bb_0_SSE;
 +
 +    for (i = 1; i < na; i++)
 +    {
 +        x_SSE    = _mm_load_ps(x+i*NNBSBB_C);
 +        bb_0_SSE = _mm_min_ps(bb_0_SSE, x_SSE);
 +        bb_1_SSE = _mm_max_ps(bb_1_SSE, x_SSE);
 +    }
 +
 +    _mm_store_ps(bb, bb_0_SSE);
 +    _mm_store_ps(bb+4, bb_1_SSE);
 +}
 +
 +/* Coordinate order xyz?, bb order xxxxyyyyzzzz */
 +static void calc_bounding_box_xxxx_sse(int na, const float *x,
 +                                       float *bb_work,
 +                                       real *bb)
 +{
 +    calc_bounding_box_sse(na, x, bb_work);
 +
 +    bb[0*STRIDE_PBB] = bb_work[BBL_X];
 +    bb[1*STRIDE_PBB] = bb_work[BBL_Y];
 +    bb[2*STRIDE_PBB] = bb_work[BBL_Z];
 +    bb[3*STRIDE_PBB] = bb_work[BBU_X];
 +    bb[4*STRIDE_PBB] = bb_work[BBU_Y];
 +    bb[5*STRIDE_PBB] = bb_work[BBU_Z];
 +}
 +
 +#endif /* NBNXN_SEARCH_SSE_SINGLE */
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +
 +/* Combines pairs of consecutive bounding boxes */
 +static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const float *bb)
 +{
 +    int    i, j, sc2, nc2, c2;
 +    __m128 min_SSE, max_SSE;
 +
 +    for (i = 0; i < grid->ncx*grid->ncy; i++)
 +    {
 +        /* Starting bb in a column is expected to be 2-aligned */
 +        sc2 = grid->cxy_ind[i]>>1;
 +        /* For odd numbers skip the last bb here */
 +        nc2 = (grid->cxy_na[i]+3)>>(2+1);
 +        for (c2 = sc2; c2 < sc2+nc2; c2++)
 +        {
 +            min_SSE = _mm_min_ps(_mm_load_ps(bb+(c2*4+0)*NNBSBB_C),
 +                                 _mm_load_ps(bb+(c2*4+2)*NNBSBB_C));
 +            max_SSE = _mm_max_ps(_mm_load_ps(bb+(c2*4+1)*NNBSBB_C),
 +                                 _mm_load_ps(bb+(c2*4+3)*NNBSBB_C));
 +            _mm_store_ps(grid->bbj+(c2*2+0)*NNBSBB_C, min_SSE);
 +            _mm_store_ps(grid->bbj+(c2*2+1)*NNBSBB_C, max_SSE);
 +        }
 +        if (((grid->cxy_na[i]+3)>>2) & 1)
 +        {
 +            /* Copy the last bb for odd bb count in this column */
 +            for (j = 0; j < NNBSBB_C; j++)
 +            {
 +                grid->bbj[(c2*2+0)*NNBSBB_C+j] = bb[(c2*4+0)*NNBSBB_C+j];
 +                grid->bbj[(c2*2+1)*NNBSBB_C+j] = bb[(c2*4+1)*NNBSBB_C+j];
 +            }
 +        }
 +    }
 +}
 +
 +#endif
 +
 +
 +/* Prints the average bb size, used for debug output */
 +static void print_bbsizes_simple(FILE                *fp,
 +                                 const nbnxn_search_t nbs,
 +                                 const nbnxn_grid_t  *grid)
 +{
 +    int  c, d;
 +    dvec ba;
 +
 +    clear_dvec(ba);
 +    for (c = 0; c < grid->nc; c++)
 +    {
 +        for (d = 0; d < DIM; d++)
 +        {
 +            ba[d] += grid->bb[c*NNBSBB_B+NNBSBB_C+d] - grid->bb[c*NNBSBB_B+d];
 +        }
 +    }
 +    dsvmul(1.0/grid->nc, ba, ba);
 +
 +    fprintf(fp, "ns bb: %4.2f %4.2f %4.2f  %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
 +            nbs->box[XX][XX]/grid->ncx,
 +            nbs->box[YY][YY]/grid->ncy,
 +            nbs->box[ZZ][ZZ]*grid->ncx*grid->ncy/grid->nc,
 +            ba[XX], ba[YY], ba[ZZ],
 +            ba[XX]*grid->ncx/nbs->box[XX][XX],
 +            ba[YY]*grid->ncy/nbs->box[YY][YY],
 +            ba[ZZ]*grid->nc/(grid->ncx*grid->ncy*nbs->box[ZZ][ZZ]));
 +}
 +
 +/* Prints the average bb size, used for debug output */
 +static void print_bbsizes_supersub(FILE                *fp,
 +                                   const nbnxn_search_t nbs,
 +                                   const nbnxn_grid_t  *grid)
 +{
 +    int  ns, c, s;
 +    dvec ba;
 +
 +    clear_dvec(ba);
 +    ns = 0;
 +    for (c = 0; c < grid->nc; c++)
 +    {
 +#ifdef NBNXN_BBXXXX
 +        for (s = 0; s < grid->nsubc[c]; s += STRIDE_PBB)
 +        {
 +            int cs_w, i, d;
 +
 +            cs_w = (c*GPU_NSUBCELL + s)/STRIDE_PBB;
 +            for (i = 0; i < STRIDE_PBB; i++)
 +            {
 +                for (d = 0; d < DIM; d++)
 +                {
 +                    ba[d] +=
 +                        grid->bb[cs_w*NNBSBB_XXXX+(DIM+d)*STRIDE_PBB+i] -
 +                        grid->bb[cs_w*NNBSBB_XXXX+     d *STRIDE_PBB+i];
 +                }
 +            }
 +        }
 +#else
 +        for (s = 0; s < grid->nsubc[c]; s++)
 +        {
 +            int cs, d;
 +
 +            cs = c*GPU_NSUBCELL + s;
 +            for (d = 0; d < DIM; d++)
 +            {
 +                ba[d] +=
 +                    grid->bb[cs*NNBSBB_B+NNBSBB_C+d] -
 +                    grid->bb[cs*NNBSBB_B         +d];
 +            }
 +        }
 +#endif
 +        ns += grid->nsubc[c];
 +    }
 +    dsvmul(1.0/ns, ba, ba);
 +
 +    fprintf(fp, "ns bb: %4.2f %4.2f %4.2f  %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
 +            nbs->box[XX][XX]/(grid->ncx*GPU_NSUBCELL_X),
 +            nbs->box[YY][YY]/(grid->ncy*GPU_NSUBCELL_Y),
 +            nbs->box[ZZ][ZZ]*grid->ncx*grid->ncy/(grid->nc*GPU_NSUBCELL_Z),
 +            ba[XX], ba[YY], ba[ZZ],
 +            ba[XX]*grid->ncx*GPU_NSUBCELL_X/nbs->box[XX][XX],
 +            ba[YY]*grid->ncy*GPU_NSUBCELL_Y/nbs->box[YY][YY],
 +            ba[ZZ]*grid->nc*GPU_NSUBCELL_Z/(grid->ncx*grid->ncy*nbs->box[ZZ][ZZ]));
 +}
 +
 +/* Potentially sorts atoms on LJ coefficients !=0 and ==0.
 + * Also sets interaction flags.
 + */
 +void sort_on_lj(nbnxn_atomdata_t *nbat, int na_c,
 +                int a0, int a1, const int *atinfo,
 +                int *order,
 +                int *flags)
 +{
 +    int      subc, s, a, n1, n2, a_lj_max, i, j;
 +    int      sort1[NBNXN_NA_SC_MAX/GPU_NSUBCELL];
 +    int      sort2[NBNXN_NA_SC_MAX/GPU_NSUBCELL];
 +    gmx_bool haveQ;
 +
 +    *flags = 0;
 +
 +    subc = 0;
 +    for (s = a0; s < a1; s += na_c)
 +    {
 +        /* Make lists for this (sub-)cell on atoms with and without LJ */
 +        n1       = 0;
 +        n2       = 0;
 +        haveQ    = FALSE;
 +        a_lj_max = -1;
 +        for (a = s; a < min(s+na_c, a1); a++)
 +        {
 +            haveQ = haveQ || GET_CGINFO_HAS_Q(atinfo[order[a]]);
 +
 +            if (GET_CGINFO_HAS_VDW(atinfo[order[a]]))
 +            {
 +                sort1[n1++] = order[a];
 +                a_lj_max    = a;
 +            }
 +            else
 +            {
 +                sort2[n2++] = order[a];
 +            }
 +        }
 +
 +        /* If we don't have atom with LJ, there's nothing to sort */
 +        if (n1 > 0)
 +        {
 +            *flags |= NBNXN_CI_DO_LJ(subc);
 +
 +            if (2*n1 <= na_c)
 +            {
 +                /* Only sort when strictly necessary. Ordering particles
 +                 * Ordering particles can lead to less accurate summation
 +                 * due to rounding, both for LJ and Coulomb interactions.
 +                 */
 +                if (2*(a_lj_max - s) >= na_c)
 +                {
 +                    for (i = 0; i < n1; i++)
 +                    {
 +                        order[a0+i] = sort1[i];
 +                    }
 +                    for (j = 0; j < n2; j++)
 +                    {
 +                        order[a0+n1+j] = sort2[j];
 +                    }
 +                }
 +
 +                *flags |= NBNXN_CI_HALF_LJ(subc);
 +            }
 +        }
 +        if (haveQ)
 +        {
 +            *flags |= NBNXN_CI_DO_COUL(subc);
 +        }
 +        subc++;
 +    }
 +}
 +
 +/* Fill a pair search cell with atoms.
 + * Potentially sorts atoms and sets the interaction flags.
 + */
 +void fill_cell(const nbnxn_search_t nbs,
 +               nbnxn_grid_t *grid,
 +               nbnxn_atomdata_t *nbat,
 +               int a0, int a1,
 +               const int *atinfo,
 +               rvec *x,
 +               int sx, int sy, int sz,
 +               float *bb_work)
 +{
 +    int     na, a;
 +    size_t  offset;
 +    float  *bb_ptr;
 +
 +    na = a1 - a0;
 +
 +    if (grid->bSimple)
 +    {
 +        sort_on_lj(nbat, grid->na_c, a0, a1, atinfo, nbs->a,
 +                   grid->flags+(a0>>grid->na_c_2log)-grid->cell0);
 +    }
 +
 +    /* Now we have sorted the atoms, set the cell indices */
 +    for (a = a0; a < a1; a++)
 +    {
 +        nbs->cell[nbs->a[a]] = a;
 +    }
 +
 +    copy_rvec_to_nbat_real(nbs->a+a0, a1-a0, grid->na_c, x,
 +                           nbat->XFormat, nbat->x, a0,
 +                           sx, sy, sz);
 +
 +    if (nbat->XFormat == nbatX4)
 +    {
 +        /* Store the bounding boxes as xyz.xyz. */
 +        offset = ((a0 - grid->cell0*grid->na_sc)>>grid->na_c_2log)*NNBSBB_B;
 +        bb_ptr = grid->bb + offset;
 +
 +#if defined GMX_DOUBLE && defined NBNXN_SEARCH_BB_SSE
 +        if (2*grid->na_cj == grid->na_c)
 +        {
 +            calc_bounding_box_x_x4_halves(na, nbat->x+X4_IND_A(a0), bb_ptr,
 +                                          grid->bbj+offset*2);
 +        }
 +        else
 +#endif
 +        {
 +            calc_bounding_box_x_x4(na, nbat->x+X4_IND_A(a0), bb_ptr);
 +        }
 +    }
 +    else if (nbat->XFormat == nbatX8)
 +    {
 +        /* Store the bounding boxes as xyz.xyz. */
 +        offset = ((a0 - grid->cell0*grid->na_sc)>>grid->na_c_2log)*NNBSBB_B;
 +        bb_ptr = grid->bb + offset;
 +
 +        calc_bounding_box_x_x8(na, nbat->x+X8_IND_A(a0), bb_ptr);
 +    }
 +#ifdef NBNXN_BBXXXX
 +    else if (!grid->bSimple)
 +    {
 +        /* Store the bounding boxes in a format convenient
 +         * for SSE calculations: xxxxyyyyzzzz...
 +         */
 +        bb_ptr =
 +            grid->bb +
 +            ((a0-grid->cell0*grid->na_sc)>>(grid->na_c_2log+STRIDE_PBB_2LOG))*NNBSBB_XXXX +
 +            (((a0-grid->cell0*grid->na_sc)>>grid->na_c_2log) & (STRIDE_PBB-1));
 +
 +#ifdef NBNXN_SEARCH_SSE_SINGLE
 +        if (nbat->XFormat == nbatXYZQ)
 +        {
 +            calc_bounding_box_xxxx_sse(na, nbat->x+a0*nbat->xstride,
 +                                       bb_work, bb_ptr);
 +        }
 +        else
 +#endif
 +        {
 +            calc_bounding_box_xxxx(na, nbat->xstride, nbat->x+a0*nbat->xstride,
 +                                   bb_ptr);
 +        }
 +        if (gmx_debug_at)
 +        {
 +            fprintf(debug, "%2d %2d %2d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
 +                    sx, sy, sz,
 +                    bb_ptr[0*STRIDE_PBB], bb_ptr[3*STRIDE_PBB],
 +                    bb_ptr[1*STRIDE_PBB], bb_ptr[4*STRIDE_PBB],
 +                    bb_ptr[2*STRIDE_PBB], bb_ptr[5*STRIDE_PBB]);
 +        }
 +    }
 +#endif
 +    else
 +    {
 +        /* Store the bounding boxes as xyz.xyz. */
 +        bb_ptr = grid->bb+((a0-grid->cell0*grid->na_sc)>>grid->na_c_2log)*NNBSBB_B;
 +
 +        calc_bounding_box(na, nbat->xstride, nbat->x+a0*nbat->xstride,
 +                          bb_ptr);
 +
 +        if (gmx_debug_at)
 +        {
 +            int bbo;
 +            bbo = (a0 - grid->cell0*grid->na_sc)/grid->na_c;
 +            fprintf(debug, "%2d %2d %2d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
 +                    sx, sy, sz,
 +                    (grid->bb+bbo*NNBSBB_B)[BBL_X],
 +                    (grid->bb+bbo*NNBSBB_B)[BBU_X],
 +                    (grid->bb+bbo*NNBSBB_B)[BBL_Y],
 +                    (grid->bb+bbo*NNBSBB_B)[BBU_Y],
 +                    (grid->bb+bbo*NNBSBB_B)[BBL_Z],
 +                    (grid->bb+bbo*NNBSBB_B)[BBU_Z]);
 +        }
 +    }
 +}
 +
 +/* Spatially sort the atoms within one grid column */
 +static void sort_columns_simple(const nbnxn_search_t nbs,
 +                                int dd_zone,
 +                                nbnxn_grid_t *grid,
 +                                int a0, int a1,
 +                                const int *atinfo,
 +                                rvec *x,
 +                                nbnxn_atomdata_t *nbat,
 +                                int cxy_start, int cxy_end,
 +                                int *sort_work)
 +{
 +    int  cxy;
 +    int  cx, cy, cz, ncz, cfilled, c;
 +    int  na, ash, ind, a;
 +    int  na_c, ash_c;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n",
 +                grid->cell0, cxy_start, cxy_end, a0, a1);
 +    }
 +
 +    /* Sort the atoms within each x,y column in 3 dimensions */
 +    for (cxy = cxy_start; cxy < cxy_end; cxy++)
 +    {
 +        cx = cxy/grid->ncy;
 +        cy = cxy - cx*grid->ncy;
 +
 +        na  = grid->cxy_na[cxy];
 +        ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
 +        ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
 +
 +        /* Sort the atoms within each x,y column on z coordinate */
 +        sort_atoms(ZZ, FALSE,
 +                   nbs->a+ash, na, x,
 +                   grid->c0[ZZ],
 +                   ncz*grid->na_sc*SORT_GRID_OVERSIZE/nbs->box[ZZ][ZZ],
 +                   ncz*grid->na_sc*SGSF, sort_work);
 +
 +        /* Fill the ncz cells in this column */
 +        cfilled = grid->cxy_ind[cxy];
 +        for (cz = 0; cz < ncz; cz++)
 +        {
 +            c  = grid->cxy_ind[cxy] + cz;
 +
 +            ash_c = ash + cz*grid->na_sc;
 +            na_c  = min(grid->na_sc, na-(ash_c-ash));
 +
 +            fill_cell(nbs, grid, nbat,
 +                      ash_c, ash_c+na_c, atinfo, x,
 +                      grid->na_sc*cx + (dd_zone >> 2),
 +                      grid->na_sc*cy + (dd_zone & 3),
 +                      grid->na_sc*cz,
 +                      NULL);
 +
 +            /* This copy to bbcz is not really necessary.
 +             * But it allows to use the same grid search code
 +             * for the simple and supersub cell setups.
 +             */
 +            if (na_c > 0)
 +            {
 +                cfilled = c;
 +            }
 +            grid->bbcz[c*NNBSBB_D  ] = grid->bb[cfilled*NNBSBB_B+2];
 +            grid->bbcz[c*NNBSBB_D+1] = grid->bb[cfilled*NNBSBB_B+6];
 +        }
 +
 +        /* Set the unused atom indices to -1 */
 +        for (ind = na; ind < ncz*grid->na_sc; ind++)
 +        {
 +            nbs->a[ash+ind] = -1;
 +        }
 +    }
 +}
 +
 +/* Spatially sort the atoms within one grid column */
 +static void sort_columns_supersub(const nbnxn_search_t nbs,
 +                                  int dd_zone,
 +                                  nbnxn_grid_t *grid,
 +                                  int a0, int a1,
 +                                  const int *atinfo,
 +                                  rvec *x,
 +                                  nbnxn_atomdata_t *nbat,
 +                                  int cxy_start, int cxy_end,
 +                                  int *sort_work)
 +{
 +    int  cxy;
 +    int  cx, cy, cz = -1, c = -1, ncz;
 +    int  na, ash, na_c, ind, a;
 +    int  subdiv_z, sub_z, na_z, ash_z;
 +    int  subdiv_y, sub_y, na_y, ash_y;
 +    int  subdiv_x, sub_x, na_x, ash_x;
 +
 +    /* cppcheck-suppress unassignedVariable */
 +    float bb_work_array[NNBSBB_B+3], *bb_work_align;
 +
 +    bb_work_align = (float *)(((size_t)(bb_work_array+3)) & (~((size_t)15)));
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n",
 +                grid->cell0, cxy_start, cxy_end, a0, a1);
 +    }
 +
 +    subdiv_x = grid->na_c;
 +    subdiv_y = GPU_NSUBCELL_X*subdiv_x;
 +    subdiv_z = GPU_NSUBCELL_Y*subdiv_y;
 +
 +    /* Sort the atoms within each x,y column in 3 dimensions */
 +    for (cxy = cxy_start; cxy < cxy_end; cxy++)
 +    {
 +        cx = cxy/grid->ncy;
 +        cy = cxy - cx*grid->ncy;
 +
 +        na  = grid->cxy_na[cxy];
 +        ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
 +        ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
 +
 +        /* Sort the atoms within each x,y column on z coordinate */
 +        sort_atoms(ZZ, FALSE,
 +                   nbs->a+ash, na, x,
 +                   grid->c0[ZZ],
 +                   ncz*grid->na_sc*SORT_GRID_OVERSIZE/nbs->box[ZZ][ZZ],
 +                   ncz*grid->na_sc*SGSF, sort_work);
 +
 +        /* This loop goes over the supercells and subcells along z at once */
 +        for (sub_z = 0; sub_z < ncz*GPU_NSUBCELL_Z; sub_z++)
 +        {
 +            ash_z = ash + sub_z*subdiv_z;
 +            na_z  = min(subdiv_z, na-(ash_z-ash));
 +
 +            /* We have already sorted on z */
 +
 +            if (sub_z % GPU_NSUBCELL_Z == 0)
 +            {
 +                cz = sub_z/GPU_NSUBCELL_Z;
 +                c  = grid->cxy_ind[cxy] + cz;
 +
 +                /* The number of atoms in this supercell */
 +                na_c = min(grid->na_sc, na-(ash_z-ash));
 +
 +                grid->nsubc[c] = min(GPU_NSUBCELL, (na_c+grid->na_c-1)/grid->na_c);
 +
 +                /* Store the z-boundaries of the super cell */
 +                grid->bbcz[c*NNBSBB_D  ] = x[nbs->a[ash_z]][ZZ];
 +                grid->bbcz[c*NNBSBB_D+1] = x[nbs->a[ash_z+na_c-1]][ZZ];
 +            }
 +
 +#if GPU_NSUBCELL_Y > 1
 +            /* Sort the atoms along y */
 +            sort_atoms(YY, (sub_z & 1),
 +                       nbs->a+ash_z, na_z, x,
-                            grid->c0[XX]+cx*grid->sx, grid->inv_sx,
++                       grid->c0[YY]+cy*grid->sy,
++                       subdiv_y*SORT_GRID_OVERSIZE*grid->inv_sy,
 +                       subdiv_y*SGSF, sort_work);
 +#endif
 +
 +            for (sub_y = 0; sub_y < GPU_NSUBCELL_Y; sub_y++)
 +            {
 +                ash_y = ash_z + sub_y*subdiv_y;
 +                na_y  = min(subdiv_y, na-(ash_y-ash));
 +
 +#if GPU_NSUBCELL_X > 1
 +                /* Sort the atoms along x */
 +                sort_atoms(XX, ((cz*GPU_NSUBCELL_Y + sub_y) & 1),
 +                           nbs->a+ash_y, na_y, x,
++                           grid->c0[XX]+cx*grid->sx,
++                           subdiv_x*SORT_GRID_OVERSIZE*grid->inv_sx,
 +                           subdiv_x*SGSF, sort_work);
 +#endif
 +
 +                for (sub_x = 0; sub_x < GPU_NSUBCELL_X; sub_x++)
 +                {
 +                    ash_x = ash_y + sub_x*subdiv_x;
 +                    na_x  = min(subdiv_x, na-(ash_x-ash));
 +
 +                    fill_cell(nbs, grid, nbat,
 +                              ash_x, ash_x+na_x, atinfo, x,
 +                              grid->na_c*(cx*GPU_NSUBCELL_X+sub_x) + (dd_zone >> 2),
 +                              grid->na_c*(cy*GPU_NSUBCELL_Y+sub_y) + (dd_zone & 3),
 +                              grid->na_c*sub_z,
 +                              bb_work_align);
 +                }
 +            }
 +        }
 +
 +        /* Set the unused atom indices to -1 */
 +        for (ind = na; ind < ncz*grid->na_sc; ind++)
 +        {
 +            nbs->a[ash+ind] = -1;
 +        }
 +    }
 +}
 +
 +/* Determine in which grid column atoms should go */
 +static void calc_column_indices(nbnxn_grid_t *grid,
 +                                int a0, int a1,
 +                                rvec *x,
 +                                int dd_zone, const int *move,
 +                                int thread, int nthread,
 +                                int *cell,
 +                                int *cxy_na)
 +{
 +    int  n0, n1, i;
 +    int  cx, cy;
 +
 +    /* We add one extra cell for particles which moved during DD */
 +    for (i = 0; i < grid->ncx*grid->ncy+1; i++)
 +    {
 +        cxy_na[i] = 0;
 +    }
 +
 +    n0 = a0 + (int)((thread+0)*(a1 - a0))/nthread;
 +    n1 = a0 + (int)((thread+1)*(a1 - a0))/nthread;
 +    if (dd_zone == 0)
 +    {
 +        /* Home zone */
 +        for (i = n0; i < n1; i++)
 +        {
 +            if (move == NULL || move[i] >= 0)
 +            {
 +                /* We need to be careful with rounding,
 +                 * particles might be a few bits outside the local zone.
 +                 * The int cast takes care of the lower bound,
 +                 * we will explicitly take care of the upper bound.
 +                 */
 +                cx = (int)((x[i][XX] - grid->c0[XX])*grid->inv_sx);
 +                cy = (int)((x[i][YY] - grid->c0[YY])*grid->inv_sy);
 +
 +#ifdef DEBUG_NBNXN_GRIDDING
 +                if (cx < 0 || cx >= grid->ncx ||
 +                    cy < 0 || cy >= grid->ncy)
 +                {
 +                    gmx_fatal(FARGS,
 +                              "grid cell cx %d cy %d out of range (max %d %d)\n"
 +                              "atom %f %f %f, grid->c0 %f %f",
 +                              cx, cy, grid->ncx, grid->ncy,
 +                              x[i][XX], x[i][YY], x[i][ZZ], grid->c0[XX], grid->c0[YY]);
 +                }
 +#endif
 +                /* Take care of potential rouding issues */
 +                cx = min(cx, grid->ncx - 1);
 +                cy = min(cy, grid->ncy - 1);
 +
 +                /* For the moment cell will contain only the, grid local,
 +                 * x and y indices, not z.
 +                 */
 +                cell[i] = cx*grid->ncy + cy;
 +            }
 +            else
 +            {
 +                /* Put this moved particle after the end of the grid,
 +                 * so we can process it later without using conditionals.
 +                 */
 +                cell[i] = grid->ncx*grid->ncy;
 +            }
 +
 +            cxy_na[cell[i]]++;
 +        }
 +    }
 +    else
 +    {
 +        /* Non-home zone */
 +        for (i = n0; i < n1; i++)
 +        {
 +            cx = (int)((x[i][XX] - grid->c0[XX])*grid->inv_sx);
 +            cy = (int)((x[i][YY] - grid->c0[YY])*grid->inv_sy);
 +
 +            /* For non-home zones there could be particles outside
 +             * the non-bonded cut-off range, which have been communicated
 +             * for bonded interactions only. For the result it doesn't
 +             * matter where these end up on the grid. For performance
 +             * we put them in an extra row at the border.
 +             */
 +            cx = max(cx, 0);
 +            cx = min(cx, grid->ncx - 1);
 +            cy = max(cy, 0);
 +            cy = min(cy, grid->ncy - 1);
 +
 +            /* For the moment cell will contain only the, grid local,
 +             * x and y indices, not z.
 +             */
 +            cell[i] = cx*grid->ncy + cy;
 +
 +            cxy_na[cell[i]]++;
 +        }
 +    }
 +}
 +
 +/* Determine in which grid cells the atoms should go */
 +static void calc_cell_indices(const nbnxn_search_t nbs,
 +                              int dd_zone,
 +                              nbnxn_grid_t *grid,
 +                              int a0, int a1,
 +                              const int *atinfo,
 +                              rvec *x,
 +                              const int *move,
 +                              nbnxn_atomdata_t *nbat)
 +{
 +    int   n0, n1, i;
 +    int   cx, cy, cxy, ncz_max, ncz;
 +    int   nthread, thread;
 +    int  *cxy_na, cxy_na_i;
 +
 +    nthread = gmx_omp_nthreads_get(emntPairsearch);
 +
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for (thread = 0; thread < nthread; thread++)
 +    {
 +        calc_column_indices(grid, a0, a1, x, dd_zone, move, thread, nthread,
 +                            nbs->cell, nbs->work[thread].cxy_na);
 +    }
 +
 +    /* Make the cell index as a function of x and y */
 +    ncz_max          = 0;
 +    ncz              = 0;
 +    grid->cxy_ind[0] = 0;
 +    for (i = 0; i < grid->ncx*grid->ncy+1; i++)
 +    {
 +        /* We set ncz_max at the beginning of the loop iso at the end
 +         * to skip i=grid->ncx*grid->ncy which are moved particles
 +         * that do not need to be ordered on the grid.
 +         */
 +        if (ncz > ncz_max)
 +        {
 +            ncz_max = ncz;
 +        }
 +        cxy_na_i = nbs->work[0].cxy_na[i];
 +        for (thread = 1; thread < nthread; thread++)
 +        {
 +            cxy_na_i += nbs->work[thread].cxy_na[i];
 +        }
 +        ncz = (cxy_na_i + grid->na_sc - 1)/grid->na_sc;
 +        if (nbat->XFormat == nbatX8)
 +        {
 +            /* Make the number of cell a multiple of 2 */
 +            ncz = (ncz + 1) & ~1;
 +        }
 +        grid->cxy_ind[i+1] = grid->cxy_ind[i] + ncz;
 +        /* Clear cxy_na, so we can reuse the array below */
 +        grid->cxy_na[i] = 0;
 +    }
 +    grid->nc = grid->cxy_ind[grid->ncx*grid->ncy] - grid->cxy_ind[0];
 +
 +    nbat->natoms = (grid->cell0 + grid->nc)*grid->na_sc;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
 +                grid->na_sc, grid->na_c, grid->nc,
 +                grid->ncx, grid->ncy, grid->nc/((double)(grid->ncx*grid->ncy)),
 +                ncz_max);
 +        if (gmx_debug_at)
 +        {
 +            i = 0;
 +            for (cy = 0; cy < grid->ncy; cy++)
 +            {
 +                for (cx = 0; cx < grid->ncx; cx++)
 +                {
 +                    fprintf(debug, " %2d", grid->cxy_ind[i+1]-grid->cxy_ind[i]);
 +                    i++;
 +                }
 +                fprintf(debug, "\n");
 +            }
 +        }
 +    }
 +
 +    /* Make sure the work array for sorting is large enough */
 +    if (ncz_max*grid->na_sc*SGSF > nbs->work[0].sort_work_nalloc)
 +    {
 +        for (thread = 0; thread < nbs->nthread_max; thread++)
 +        {
 +            nbs->work[thread].sort_work_nalloc =
 +                over_alloc_large(ncz_max*grid->na_sc*SGSF);
 +            srenew(nbs->work[thread].sort_work,
 +                   nbs->work[thread].sort_work_nalloc);
 +            /* When not in use, all elements should be -1 */
 +            for (i = 0; i < nbs->work[thread].sort_work_nalloc; i++)
 +            {
 +                nbs->work[thread].sort_work[i] = -1;
 +            }
 +        }
 +    }
 +
 +    /* Now we know the dimensions we can fill the grid.
 +     * This is the first, unsorted fill. We sort the columns after this.
 +     */
 +    for (i = a0; i < a1; i++)
 +    {
 +        /* At this point nbs->cell contains the local grid x,y indices */
 +        cxy = nbs->cell[i];
 +        nbs->a[(grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc + grid->cxy_na[cxy]++] = i;
 +    }
 +
 +    if (dd_zone == 0)
 +    {
 +        /* Set the cell indices for the moved particles */
 +        n0 = grid->nc*grid->na_sc;
 +        n1 = grid->nc*grid->na_sc+grid->cxy_na[grid->ncx*grid->ncy];
 +        if (dd_zone == 0)
 +        {
 +            for (i = n0; i < n1; i++)
 +            {
 +                nbs->cell[nbs->a[i]] = i;
 +            }
 +        }
 +    }
 +
 +    /* Sort the super-cell columns along z into the sub-cells. */
 +#pragma omp parallel for num_threads(nbs->nthread_max) schedule(static)
 +    for (thread = 0; thread < nbs->nthread_max; thread++)
 +    {
 +        if (grid->bSimple)
 +        {
 +            sort_columns_simple(nbs, dd_zone, grid, a0, a1, atinfo, x, nbat,
 +                                ((thread+0)*grid->ncx*grid->ncy)/nthread,
 +                                ((thread+1)*grid->ncx*grid->ncy)/nthread,
 +                                nbs->work[thread].sort_work);
 +        }
 +        else
 +        {
 +            sort_columns_supersub(nbs, dd_zone, grid, a0, a1, atinfo, x, nbat,
 +                                  ((thread+0)*grid->ncx*grid->ncy)/nthread,
 +                                  ((thread+1)*grid->ncx*grid->ncy)/nthread,
 +                                  nbs->work[thread].sort_work);
 +        }
 +    }
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +    if (grid->bSimple && nbat->XFormat == nbatX8)
 +    {
 +        combine_bounding_box_pairs(grid, grid->bb);
 +    }
 +#endif
 +
 +    if (!grid->bSimple)
 +    {
 +        grid->nsubc_tot = 0;
 +        for (i = 0; i < grid->nc; i++)
 +        {
 +            grid->nsubc_tot += grid->nsubc[i];
 +        }
 +    }
 +
 +    if (debug)
 +    {
 +        if (grid->bSimple)
 +        {
 +            print_bbsizes_simple(debug, nbs, grid);
 +        }
 +        else
 +        {
 +            fprintf(debug, "ns non-zero sub-cells: %d average atoms %.2f\n",
 +                    grid->nsubc_tot, (a1-a0)/(double)grid->nsubc_tot);
 +
 +            print_bbsizes_supersub(debug, nbs, grid);
 +        }
 +    }
 +}
 +
 +static void init_buffer_flags(nbnxn_buffer_flags_t *flags,
 +                              int                   natoms)
 +{
 +    int b;
 +
 +    flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE;
 +    if (flags->nflag > flags->flag_nalloc)
 +    {
 +        flags->flag_nalloc = over_alloc_large(flags->nflag);
 +        srenew(flags->flag, flags->flag_nalloc);
 +    }
 +    for (b = 0; b < flags->nflag; b++)
 +    {
 +        flags->flag[b] = 0;
 +    }
 +}
 +
 +/* Sets up a grid and puts the atoms on the grid.
 + * This function only operates on one domain of the domain decompostion.
 + * Note that without domain decomposition there is only one domain.
 + */
 +void nbnxn_put_on_grid(nbnxn_search_t nbs,
 +                       int ePBC, matrix box,
 +                       int dd_zone,
 +                       rvec corner0, rvec corner1,
 +                       int a0, int a1,
 +                       real atom_density,
 +                       const int *atinfo,
 +                       rvec *x,
 +                       int nmoved, int *move,
 +                       int nb_kernel_type,
 +                       nbnxn_atomdata_t *nbat)
 +{
 +    nbnxn_grid_t *grid;
 +    int           n;
 +    int           nc_max_grid, nc_max;
 +
 +    grid = &nbs->grid[dd_zone];
 +
 +    nbs_cycle_start(&nbs->cc[enbsCCgrid]);
 +
 +    grid->bSimple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
 +
 +    grid->na_c      = nbnxn_kernel_to_ci_size(nb_kernel_type);
 +    grid->na_cj     = nbnxn_kernel_to_cj_size(nb_kernel_type);
 +    grid->na_sc     = (grid->bSimple ? 1 : GPU_NSUBCELL)*grid->na_c;
 +    grid->na_c_2log = get_2log(grid->na_c);
 +
 +    nbat->na_c = grid->na_c;
 +
 +    if (dd_zone == 0)
 +    {
 +        grid->cell0 = 0;
 +    }
 +    else
 +    {
 +        grid->cell0 =
 +            (nbs->grid[dd_zone-1].cell0 + nbs->grid[dd_zone-1].nc)*
 +            nbs->grid[dd_zone-1].na_sc/grid->na_sc;
 +    }
 +
 +    n = a1 - a0;
 +
 +    if (dd_zone == 0)
 +    {
 +        nbs->ePBC = ePBC;
 +        copy_mat(box, nbs->box);
 +
 +        if (atom_density >= 0)
 +        {
 +            grid->atom_density = atom_density;
 +        }
 +        else
 +        {
 +            grid->atom_density = grid_atom_density(n-nmoved, corner0, corner1);
 +        }
 +
 +        grid->cell0 = 0;
 +
 +        nbs->natoms_local    = a1 - nmoved;
 +        /* We assume that nbnxn_put_on_grid is called first
 +         * for the local atoms (dd_zone=0).
 +         */
 +        nbs->natoms_nonlocal = a1 - nmoved;
 +    }
 +    else
 +    {
 +        nbs->natoms_nonlocal = max(nbs->natoms_nonlocal, a1);
 +    }
 +
 +    nc_max_grid = set_grid_size_xy(nbs, grid,
 +                                   dd_zone, n-nmoved, corner0, corner1,
 +                                   nbs->grid[0].atom_density,
 +                                   nbat->XFormat);
 +
 +    nc_max = grid->cell0 + nc_max_grid;
 +
 +    if (a1 > nbs->cell_nalloc)
 +    {
 +        nbs->cell_nalloc = over_alloc_large(a1);
 +        srenew(nbs->cell, nbs->cell_nalloc);
 +    }
 +
 +    /* To avoid conditionals we store the moved particles at the end of a,
 +     * make sure we have enough space.
 +     */
 +    if (nc_max*grid->na_sc + nmoved > nbs->a_nalloc)
 +    {
 +        nbs->a_nalloc = over_alloc_large(nc_max*grid->na_sc + nmoved);
 +        srenew(nbs->a, nbs->a_nalloc);
 +    }
 +
 +    /* We need padding up to a multiple of the buffer flag size: simply add */
 +    if (nc_max*grid->na_sc + NBNXN_BUFFERFLAG_SIZE > nbat->nalloc)
 +    {
 +        nbnxn_atomdata_realloc(nbat, nc_max*grid->na_sc+NBNXN_BUFFERFLAG_SIZE);
 +    }
 +
 +    calc_cell_indices(nbs, dd_zone, grid, a0, a1, atinfo, x, move, nbat);
 +
 +    if (dd_zone == 0)
 +    {
 +        nbat->natoms_local = nbat->natoms;
 +    }
 +
 +    nbs_cycle_stop(&nbs->cc[enbsCCgrid]);
 +}
 +
 +/* Calls nbnxn_put_on_grid for all non-local domains */
 +void nbnxn_put_on_grid_nonlocal(nbnxn_search_t            nbs,
 +                                const gmx_domdec_zones_t *zones,
 +                                const int                *atinfo,
 +                                rvec                     *x,
 +                                int                       nb_kernel_type,
 +                                nbnxn_atomdata_t         *nbat)
 +{
 +    int  zone, d;
 +    rvec c0, c1;
 +
 +    for (zone = 1; zone < zones->n; zone++)
 +    {
 +        for (d = 0; d < DIM; d++)
 +        {
 +            c0[d] = zones->size[zone].bb_x0[d];
 +            c1[d] = zones->size[zone].bb_x1[d];
 +        }
 +
 +        nbnxn_put_on_grid(nbs, nbs->ePBC, NULL,
 +                          zone, c0, c1,
 +                          zones->cg_range[zone],
 +                          zones->cg_range[zone+1],
 +                          -1,
 +                          atinfo,
 +                          x,
 +                          0, NULL,
 +                          nb_kernel_type,
 +                          nbat);
 +    }
 +}
 +
 +/* Add simple grid type information to the local super/sub grid */
 +void nbnxn_grid_add_simple(nbnxn_search_t    nbs,
 +                           nbnxn_atomdata_t *nbat)
 +{
 +    nbnxn_grid_t *grid;
 +    float        *bbcz, *bb;
 +    int           ncd, sc;
 +
 +    grid = &nbs->grid[0];
 +
 +    if (grid->bSimple)
 +    {
 +        gmx_incons("nbnxn_grid_simple called with a simple grid");
 +    }
 +
 +    ncd = grid->na_sc/NBNXN_CPU_CLUSTER_I_SIZE;
 +
 +    if (grid->nc*ncd > grid->nc_nalloc_simple)
 +    {
 +        grid->nc_nalloc_simple = over_alloc_large(grid->nc*ncd);
 +        srenew(grid->bbcz_simple, grid->nc_nalloc_simple*NNBSBB_D);
 +        srenew(grid->bb_simple, grid->nc_nalloc_simple*NNBSBB_B);
 +        srenew(grid->flags_simple, grid->nc_nalloc_simple);
 +        if (nbat->XFormat)
 +        {
 +            sfree_aligned(grid->bbj);
 +            snew_aligned(grid->bbj, grid->nc_nalloc_simple/2, 16);
 +        }
 +    }
 +
 +    bbcz = grid->bbcz_simple;
 +    bb   = grid->bb_simple;
 +
 +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static)
 +    for (sc = 0; sc < grid->nc; sc++)
 +    {
 +        int c, tx, na;
 +
 +        for (c = 0; c < ncd; c++)
 +        {
 +            tx = sc*ncd + c;
 +
 +            na = NBNXN_CPU_CLUSTER_I_SIZE;
 +            while (na > 0 &&
 +                   nbat->type[tx*NBNXN_CPU_CLUSTER_I_SIZE+na-1] == nbat->ntype-1)
 +            {
 +                na--;
 +            }
 +
 +            if (na > 0)
 +            {
 +                switch (nbat->XFormat)
 +                {
 +                    case nbatX4:
 +                        /* PACK_X4==NBNXN_CPU_CLUSTER_I_SIZE, so this is simple */
 +                        calc_bounding_box_x_x4(na, nbat->x+tx*STRIDE_P4,
 +                                               bb+tx*NNBSBB_B);
 +                        break;
 +                    case nbatX8:
 +                        /* PACK_X8>NBNXN_CPU_CLUSTER_I_SIZE, more complicated */
 +                        calc_bounding_box_x_x8(na, nbat->x+X8_IND_A(tx*NBNXN_CPU_CLUSTER_I_SIZE),
 +                                               bb+tx*NNBSBB_B);
 +                        break;
 +                    default:
 +                        calc_bounding_box(na, nbat->xstride,
 +                                          nbat->x+tx*NBNXN_CPU_CLUSTER_I_SIZE*nbat->xstride,
 +                                          bb+tx*NNBSBB_B);
 +                        break;
 +                }
 +                bbcz[tx*NNBSBB_D+0] = bb[tx*NNBSBB_B         +ZZ];
 +                bbcz[tx*NNBSBB_D+1] = bb[tx*NNBSBB_B+NNBSBB_C+ZZ];
 +
 +                /* No interaction optimization yet here */
 +                grid->flags_simple[tx] = NBNXN_CI_DO_LJ(0) | NBNXN_CI_DO_COUL(0);
 +            }
 +            else
 +            {
 +                grid->flags_simple[tx] = 0;
 +            }
 +        }
 +    }
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +    if (grid->bSimple && nbat->XFormat == nbatX8)
 +    {
 +        combine_bounding_box_pairs(grid, grid->bb_simple);
 +    }
 +#endif
 +}
 +
 +void nbnxn_get_ncells(nbnxn_search_t nbs, int *ncx, int *ncy)
 +{
 +    *ncx = nbs->grid[0].ncx;
 +    *ncy = nbs->grid[0].ncy;
 +}
 +
 +void nbnxn_get_atomorder(nbnxn_search_t nbs, int **a, int *n)
 +{
 +    const nbnxn_grid_t *grid;
 +
 +    grid = &nbs->grid[0];
 +
 +    /* Return the atom order for the home cell (index 0) */
 +    *a  = nbs->a;
 +
 +    *n = grid->cxy_ind[grid->ncx*grid->ncy]*grid->na_sc;
 +}
 +
 +void nbnxn_set_atomorder(nbnxn_search_t nbs)
 +{
 +    nbnxn_grid_t *grid;
 +    int           ao, cx, cy, cxy, cz, j;
 +
 +    /* Set the atom order for the home cell (index 0) */
 +    grid = &nbs->grid[0];
 +
 +    ao = 0;
 +    for (cx = 0; cx < grid->ncx; cx++)
 +    {
 +        for (cy = 0; cy < grid->ncy; cy++)
 +        {
 +            cxy = cx*grid->ncy + cy;
 +            j   = grid->cxy_ind[cxy]*grid->na_sc;
 +            for (cz = 0; cz < grid->cxy_na[cxy]; cz++)
 +            {
 +                nbs->a[j]     = ao;
 +                nbs->cell[ao] = j;
 +                ao++;
 +                j++;
 +            }
 +        }
 +    }
 +}
 +
 +/* Determines the cell range along one dimension that
 + * the bounding box b0 - b1 sees.
 + */
 +static void get_cell_range(real b0, real b1,
 +                           int nc, real c0, real s, real invs,
 +                           real d2, real r2, int *cf, int *cl)
 +{
 +    *cf = max((int)((b0 - c0)*invs), 0);
 +
 +    while (*cf > 0 && d2 + sqr((b0 - c0) - (*cf-1+1)*s) < r2)
 +    {
 +        (*cf)--;
 +    }
 +
 +    *cl = min((int)((b1 - c0)*invs), nc-1);
 +    while (*cl < nc-1 && d2 + sqr((*cl+1)*s - (b1 - c0)) < r2)
 +    {
 +        (*cl)++;
 +    }
 +}
 +
 +/* Reference code calculating the distance^2 between two bounding boxes */
 +static float box_dist2(float bx0, float bx1, float by0,
 +                       float by1, float bz0, float bz1,
 +                       const float *bb)
 +{
 +    float d2;
 +    float dl, dh, dm, dm0;
 +
 +    d2 = 0;
 +
 +    dl  = bx0 - bb[BBU_X];
 +    dh  = bb[BBL_X] - bx1;
 +    dm  = max(dl, dh);
 +    dm0 = max(dm, 0);
 +    d2 += dm0*dm0;
 +
 +    dl  = by0 - bb[BBU_Y];
 +    dh  = bb[BBL_Y] - by1;
 +    dm  = max(dl, dh);
 +    dm0 = max(dm, 0);
 +    d2 += dm0*dm0;
 +
 +    dl  = bz0 - bb[BBU_Z];
 +    dh  = bb[BBL_Z] - bz1;
 +    dm  = max(dl, dh);
 +    dm0 = max(dm, 0);
 +    d2 += dm0*dm0;
 +
 +    return d2;
 +}
 +
 +/* Plain C code calculating the distance^2 between two bounding boxes */
 +static float subc_bb_dist2(int si, const float *bb_i_ci,
 +                           int csj, const float *bb_j_all)
 +{
 +    const float *bb_i, *bb_j;
 +    float        d2;
 +    float        dl, dh, dm, dm0;
 +
 +    bb_i = bb_i_ci  +  si*NNBSBB_B;
 +    bb_j = bb_j_all + csj*NNBSBB_B;
 +
 +    d2 = 0;
 +
 +    dl  = bb_i[BBL_X] - bb_j[BBU_X];
 +    dh  = bb_j[BBL_X] - bb_i[BBU_X];
 +    dm  = max(dl, dh);
 +    dm0 = max(dm, 0);
 +    d2 += dm0*dm0;
 +
 +    dl  = bb_i[BBL_Y] - bb_j[BBU_Y];
 +    dh  = bb_j[BBL_Y] - bb_i[BBU_Y];
 +    dm  = max(dl, dh);
 +    dm0 = max(dm, 0);
 +    d2 += dm0*dm0;
 +
 +    dl  = bb_i[BBL_Z] - bb_j[BBU_Z];
 +    dh  = bb_j[BBL_Z] - bb_i[BBU_Z];
 +    dm  = max(dl, dh);
 +    dm0 = max(dm, 0);
 +    d2 += dm0*dm0;
 +
 +    return d2;
 +}
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +
 +/* SSE code for bb distance for bb format xyz0 */
 +static float subc_bb_dist2_sse(int na_c,
 +                               int si, const float *bb_i_ci,
 +                               int csj, const float *bb_j_all)
 +{
 +    const float *bb_i, *bb_j;
 +
 +    __m128       bb_i_SSE0, bb_i_SSE1;
 +    __m128       bb_j_SSE0, bb_j_SSE1;
 +    __m128       dl_SSE;
 +    __m128       dh_SSE;
 +    __m128       dm_SSE;
 +    __m128       dm0_SSE;
 +    __m128       d2_SSE;
 +#ifndef GMX_X86_SSE4_1
 +    float        d2_array[7], *d2_align;
 +
 +    d2_align = (float *)(((size_t)(d2_array+3)) & (~((size_t)15)));
 +#else
 +    float d2;
 +#endif
 +
 +    bb_i = bb_i_ci  +  si*NNBSBB_B;
 +    bb_j = bb_j_all + csj*NNBSBB_B;
 +
 +    bb_i_SSE0 = _mm_load_ps(bb_i);
 +    bb_i_SSE1 = _mm_load_ps(bb_i+NNBSBB_C);
 +    bb_j_SSE0 = _mm_load_ps(bb_j);
 +    bb_j_SSE1 = _mm_load_ps(bb_j+NNBSBB_C);
 +
 +    dl_SSE    = _mm_sub_ps(bb_i_SSE0, bb_j_SSE1);
 +    dh_SSE    = _mm_sub_ps(bb_j_SSE0, bb_i_SSE1);
 +
 +    dm_SSE    = _mm_max_ps(dl_SSE, dh_SSE);
 +    dm0_SSE   = _mm_max_ps(dm_SSE, _mm_setzero_ps());
 +#ifndef GMX_X86_SSE4_1
 +    d2_SSE    = _mm_mul_ps(dm0_SSE, dm0_SSE);
 +
 +    _mm_store_ps(d2_align, d2_SSE);
 +
 +    return d2_align[0] + d2_align[1] + d2_align[2];
 +#else
 +    /* SSE4.1 dot product of components 0,1,2 */
 +    d2_SSE    = _mm_dp_ps(dm0_SSE, dm0_SSE, 0x71);
 +
 +    _mm_store_ss(&d2, d2_SSE);
 +
 +    return d2;
 +#endif
 +}
 +
 +/* Calculate bb bounding distances of bb_i[si,...,si+3] and store them in d2 */
 +#define SUBC_BB_DIST2_SSE_XXXX_INNER(si, bb_i, d2) \
 +    {                                                \
 +        int    shi;                                  \
 +                                                 \
 +        __m128 dx_0, dy_0, dz_0;                       \
 +        __m128 dx_1, dy_1, dz_1;                       \
 +                                                 \
 +        __m128 mx, my, mz;                             \
 +        __m128 m0x, m0y, m0z;                          \
 +                                                 \
 +        __m128 d2x, d2y, d2z;                          \
 +        __m128 d2s, d2t;                              \
 +                                                 \
 +        shi = si*NNBSBB_D*DIM;                       \
 +                                                 \
 +        xi_l = _mm_load_ps(bb_i+shi+0*STRIDE_PBB);   \
 +        yi_l = _mm_load_ps(bb_i+shi+1*STRIDE_PBB);   \
 +        zi_l = _mm_load_ps(bb_i+shi+2*STRIDE_PBB);   \
 +        xi_h = _mm_load_ps(bb_i+shi+3*STRIDE_PBB);   \
 +        yi_h = _mm_load_ps(bb_i+shi+4*STRIDE_PBB);   \
 +        zi_h = _mm_load_ps(bb_i+shi+5*STRIDE_PBB);   \
 +                                                 \
 +        dx_0 = _mm_sub_ps(xi_l, xj_h);                \
 +        dy_0 = _mm_sub_ps(yi_l, yj_h);                \
 +        dz_0 = _mm_sub_ps(zi_l, zj_h);                \
 +                                                 \
 +        dx_1 = _mm_sub_ps(xj_l, xi_h);                \
 +        dy_1 = _mm_sub_ps(yj_l, yi_h);                \
 +        dz_1 = _mm_sub_ps(zj_l, zi_h);                \
 +                                                 \
 +        mx   = _mm_max_ps(dx_0, dx_1);                \
 +        my   = _mm_max_ps(dy_0, dy_1);                \
 +        mz   = _mm_max_ps(dz_0, dz_1);                \
 +                                                 \
 +        m0x  = _mm_max_ps(mx, zero);                  \
 +        m0y  = _mm_max_ps(my, zero);                  \
 +        m0z  = _mm_max_ps(mz, zero);                  \
 +                                                 \
 +        d2x  = _mm_mul_ps(m0x, m0x);                  \
 +        d2y  = _mm_mul_ps(m0y, m0y);                  \
 +        d2z  = _mm_mul_ps(m0z, m0z);                  \
 +                                                 \
 +        d2s  = _mm_add_ps(d2x, d2y);                  \
 +        d2t  = _mm_add_ps(d2s, d2z);                  \
 +                                                 \
 +        _mm_store_ps(d2+si, d2t);                     \
 +    }
 +
 +/* SSE code for nsi bb distances for bb format xxxxyyyyzzzz */
 +static void subc_bb_dist2_sse_xxxx(const float *bb_j,
 +                                   int nsi, const float *bb_i,
 +                                   float *d2)
 +{
 +    __m128 xj_l, yj_l, zj_l;
 +    __m128 xj_h, yj_h, zj_h;
 +    __m128 xi_l, yi_l, zi_l;
 +    __m128 xi_h, yi_h, zi_h;
 +
 +    __m128 zero;
 +
 +    zero = _mm_setzero_ps();
 +
 +    xj_l = _mm_set1_ps(bb_j[0*STRIDE_PBB]);
 +    yj_l = _mm_set1_ps(bb_j[1*STRIDE_PBB]);
 +    zj_l = _mm_set1_ps(bb_j[2*STRIDE_PBB]);
 +    xj_h = _mm_set1_ps(bb_j[3*STRIDE_PBB]);
 +    yj_h = _mm_set1_ps(bb_j[4*STRIDE_PBB]);
 +    zj_h = _mm_set1_ps(bb_j[5*STRIDE_PBB]);
 +
 +    /* Here we "loop" over si (0,STRIDE_PBB) from 0 to nsi with step STRIDE_PBB.
 +     * But as we know the number of iterations is 1 or 2, we unroll manually.
 +     */
 +    SUBC_BB_DIST2_SSE_XXXX_INNER(0, bb_i, d2);
 +    if (STRIDE_PBB < nsi)
 +    {
 +        SUBC_BB_DIST2_SSE_XXXX_INNER(STRIDE_PBB, bb_i, d2);
 +    }
 +}
 +
 +#endif /* NBNXN_SEARCH_BB_SSE */
 +
 +/* Plain C function which determines if any atom pair between two cells
 + * is within distance sqrt(rl2).
 + */
 +static gmx_bool subc_in_range_x(int na_c,
 +                                int si, const real *x_i,
 +                                int csj, int stride, const real *x_j,
 +                                real rl2)
 +{
 +    int  i, j, i0, j0;
 +    real d2;
 +
 +    for (i = 0; i < na_c; i++)
 +    {
 +        i0 = (si*na_c + i)*DIM;
 +        for (j = 0; j < na_c; j++)
 +        {
 +            j0 = (csj*na_c + j)*stride;
 +
 +            d2 = sqr(x_i[i0  ] - x_j[j0  ]) +
 +                sqr(x_i[i0+1] - x_j[j0+1]) +
 +                sqr(x_i[i0+2] - x_j[j0+2]);
 +
 +            if (d2 < rl2)
 +            {
 +                return TRUE;
 +            }
 +        }
 +    }
 +
 +    return FALSE;
 +}
 +
 +/* SSE function which determines if any atom pair between two cells,
 + * both with 8 atoms, is within distance sqrt(rl2).
 + */
 +static gmx_bool subc_in_range_sse8(int na_c,
 +                                   int si, const real *x_i,
 +                                   int csj, int stride, const real *x_j,
 +                                   real rl2)
 +{
 +#ifdef NBNXN_SEARCH_SSE_SINGLE
 +    __m128 ix_SSE0, iy_SSE0, iz_SSE0;
 +    __m128 ix_SSE1, iy_SSE1, iz_SSE1;
 +
 +    __m128 rc2_SSE;
 +
 +    int    na_c_sse;
 +    int    j0, j1;
 +
 +    rc2_SSE   = _mm_set1_ps(rl2);
 +
 +    na_c_sse = NBNXN_GPU_CLUSTER_SIZE/STRIDE_PBB;
 +    ix_SSE0  = _mm_load_ps(x_i+(si*na_c_sse*DIM+0)*STRIDE_PBB);
 +    iy_SSE0  = _mm_load_ps(x_i+(si*na_c_sse*DIM+1)*STRIDE_PBB);
 +    iz_SSE0  = _mm_load_ps(x_i+(si*na_c_sse*DIM+2)*STRIDE_PBB);
 +    ix_SSE1  = _mm_load_ps(x_i+(si*na_c_sse*DIM+3)*STRIDE_PBB);
 +    iy_SSE1  = _mm_load_ps(x_i+(si*na_c_sse*DIM+4)*STRIDE_PBB);
 +    iz_SSE1  = _mm_load_ps(x_i+(si*na_c_sse*DIM+5)*STRIDE_PBB);
 +
 +    /* We loop from the outer to the inner particles to maximize
 +     * the chance that we find a pair in range quickly and return.
 +     */
 +    j0 = csj*na_c;
 +    j1 = j0 + na_c - 1;
 +    while (j0 < j1)
 +    {
 +        __m128 jx0_SSE, jy0_SSE, jz0_SSE;
 +        __m128 jx1_SSE, jy1_SSE, jz1_SSE;
 +
 +        __m128 dx_SSE0, dy_SSE0, dz_SSE0;
 +        __m128 dx_SSE1, dy_SSE1, dz_SSE1;
 +        __m128 dx_SSE2, dy_SSE2, dz_SSE2;
 +        __m128 dx_SSE3, dy_SSE3, dz_SSE3;
 +
 +        __m128 rsq_SSE0;
 +        __m128 rsq_SSE1;
 +        __m128 rsq_SSE2;
 +        __m128 rsq_SSE3;
 +
 +        __m128 wco_SSE0;
 +        __m128 wco_SSE1;
 +        __m128 wco_SSE2;
 +        __m128 wco_SSE3;
 +        __m128 wco_any_SSE01, wco_any_SSE23, wco_any_SSE;
 +
 +        jx0_SSE = _mm_load1_ps(x_j+j0*stride+0);
 +        jy0_SSE = _mm_load1_ps(x_j+j0*stride+1);
 +        jz0_SSE = _mm_load1_ps(x_j+j0*stride+2);
 +
 +        jx1_SSE = _mm_load1_ps(x_j+j1*stride+0);
 +        jy1_SSE = _mm_load1_ps(x_j+j1*stride+1);
 +        jz1_SSE = _mm_load1_ps(x_j+j1*stride+2);
 +
 +        /* Calculate distance */
 +        dx_SSE0            = _mm_sub_ps(ix_SSE0, jx0_SSE);
 +        dy_SSE0            = _mm_sub_ps(iy_SSE0, jy0_SSE);
 +        dz_SSE0            = _mm_sub_ps(iz_SSE0, jz0_SSE);
 +        dx_SSE1            = _mm_sub_ps(ix_SSE1, jx0_SSE);
 +        dy_SSE1            = _mm_sub_ps(iy_SSE1, jy0_SSE);
 +        dz_SSE1            = _mm_sub_ps(iz_SSE1, jz0_SSE);
 +        dx_SSE2            = _mm_sub_ps(ix_SSE0, jx1_SSE);
 +        dy_SSE2            = _mm_sub_ps(iy_SSE0, jy1_SSE);
 +        dz_SSE2            = _mm_sub_ps(iz_SSE0, jz1_SSE);
 +        dx_SSE3            = _mm_sub_ps(ix_SSE1, jx1_SSE);
 +        dy_SSE3            = _mm_sub_ps(iy_SSE1, jy1_SSE);
 +        dz_SSE3            = _mm_sub_ps(iz_SSE1, jz1_SSE);
 +
 +        /* rsq = dx*dx+dy*dy+dz*dz */
 +        rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
 +        rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
 +        rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
 +        rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
 +
 +        wco_SSE0           = _mm_cmplt_ps(rsq_SSE0, rc2_SSE);
 +        wco_SSE1           = _mm_cmplt_ps(rsq_SSE1, rc2_SSE);
 +        wco_SSE2           = _mm_cmplt_ps(rsq_SSE2, rc2_SSE);
 +        wco_SSE3           = _mm_cmplt_ps(rsq_SSE3, rc2_SSE);
 +
 +        wco_any_SSE01      = _mm_or_ps(wco_SSE0, wco_SSE1);
 +        wco_any_SSE23      = _mm_or_ps(wco_SSE2, wco_SSE3);
 +        wco_any_SSE        = _mm_or_ps(wco_any_SSE01, wco_any_SSE23);
 +
 +        if (_mm_movemask_ps(wco_any_SSE))
 +        {
 +            return TRUE;
 +        }
 +
 +        j0++;
 +        j1--;
 +    }
 +    return FALSE;
 +
 +#else
 +    /* No SSE */
 +    gmx_incons("SSE function called without SSE support");
 +
 +    return TRUE;
 +#endif
 +}
 +
 +/* Returns the j sub-cell for index cj_ind */
 +static int nbl_cj(const nbnxn_pairlist_t *nbl, int cj_ind)
 +{
 +    return nbl->cj4[cj_ind >> NBNXN_GPU_JGROUP_SIZE_2LOG].cj[cj_ind & (NBNXN_GPU_JGROUP_SIZE - 1)];
 +}
 +
 +/* Returns the i-interaction mask of the j sub-cell for index cj_ind */
 +static unsigned nbl_imask0(const nbnxn_pairlist_t *nbl, int cj_ind)
 +{
 +    return nbl->cj4[cj_ind >> NBNXN_GPU_JGROUP_SIZE_2LOG].imei[0].imask;
 +}
 +
 +/* Ensures there is enough space for extra extra exclusion masks */
 +static void check_excl_space(nbnxn_pairlist_t *nbl, int extra)
 +{
 +    if (nbl->nexcl+extra > nbl->excl_nalloc)
 +    {
 +        nbl->excl_nalloc = over_alloc_small(nbl->nexcl+extra);
 +        nbnxn_realloc_void((void **)&nbl->excl,
 +                           nbl->nexcl*sizeof(*nbl->excl),
 +                           nbl->excl_nalloc*sizeof(*nbl->excl),
 +                           nbl->alloc, nbl->free);
 +    }
 +}
 +
 +/* Ensures there is enough space for ncell extra j-cells in the list */
 +static void check_subcell_list_space_simple(nbnxn_pairlist_t *nbl,
 +                                            int               ncell)
 +{
 +    int cj_max;
 +
 +    cj_max = nbl->ncj + ncell;
 +
 +    if (cj_max > nbl->cj_nalloc)
 +    {
 +        nbl->cj_nalloc = over_alloc_small(cj_max);
 +        nbnxn_realloc_void((void **)&nbl->cj,
 +                           nbl->ncj*sizeof(*nbl->cj),
 +                           nbl->cj_nalloc*sizeof(*nbl->cj),
 +                           nbl->alloc, nbl->free);
 +    }
 +}
 +
 +/* Ensures there is enough space for ncell extra j-subcells in the list */
 +static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl,
 +                                              int               nsupercell)
 +{
 +    int ncj4_max, j4, j, w, t;
 +
 +#define NWARP       2
 +#define WARP_SIZE  32
 +
 +    /* We can have maximally nsupercell*GPU_NSUBCELL sj lists */
 +    /* We can store 4 j-subcell - i-supercell pairs in one struct.
 +     * since we round down, we need one extra entry.
 +     */
 +    ncj4_max = ((nbl->work->cj_ind + nsupercell*GPU_NSUBCELL + NBNXN_GPU_JGROUP_SIZE - 1) >> NBNXN_GPU_JGROUP_SIZE_2LOG);
 +
 +    if (ncj4_max > nbl->cj4_nalloc)
 +    {
 +        nbl->cj4_nalloc = over_alloc_small(ncj4_max);
 +        nbnxn_realloc_void((void **)&nbl->cj4,
 +                           nbl->work->cj4_init*sizeof(*nbl->cj4),
 +                           nbl->cj4_nalloc*sizeof(*nbl->cj4),
 +                           nbl->alloc, nbl->free);
 +    }
 +
 +    if (ncj4_max > nbl->work->cj4_init)
 +    {
 +        for (j4 = nbl->work->cj4_init; j4 < ncj4_max; j4++)
 +        {
 +            /* No i-subcells and no excl's in the list initially */
 +            for (w = 0; w < NWARP; w++)
 +            {
 +                nbl->cj4[j4].imei[w].imask    = 0U;
 +                nbl->cj4[j4].imei[w].excl_ind = 0;
 +
 +            }
 +        }
 +        nbl->work->cj4_init = ncj4_max;
 +    }
 +}
 +
 +/* Set all excl masks for one GPU warp no exclusions */
 +static void set_no_excls(nbnxn_excl_t *excl)
 +{
 +    int t;
 +
 +    for (t = 0; t < WARP_SIZE; t++)
 +    {
 +        /* Turn all interaction bits on */
 +        excl->pair[t] = NBNXN_INT_MASK_ALL;
 +    }
 +}
 +
 +/* Initializes a single nbnxn_pairlist_t data structure */
 +static void nbnxn_init_pairlist(nbnxn_pairlist_t *nbl,
 +                                gmx_bool          bSimple,
 +                                nbnxn_alloc_t    *alloc,
 +                                nbnxn_free_t     *free)
 +{
 +    if (alloc == NULL)
 +    {
 +        nbl->alloc = nbnxn_alloc_aligned;
 +    }
 +    else
 +    {
 +        nbl->alloc = alloc;
 +    }
 +    if (free == NULL)
 +    {
 +        nbl->free = nbnxn_free_aligned;
 +    }
 +    else
 +    {
 +        nbl->free = free;
 +    }
 +
 +    nbl->bSimple     = bSimple;
 +    nbl->na_sc       = 0;
 +    nbl->na_ci       = 0;
 +    nbl->na_cj       = 0;
 +    nbl->nci         = 0;
 +    nbl->ci          = NULL;
 +    nbl->ci_nalloc   = 0;
 +    nbl->ncj         = 0;
 +    nbl->cj          = NULL;
 +    nbl->cj_nalloc   = 0;
 +    nbl->ncj4        = 0;
 +    /* We need one element extra in sj, so alloc initially with 1 */
 +    nbl->cj4_nalloc  = 0;
 +    nbl->cj4         = NULL;
 +    nbl->nci_tot     = 0;
 +
 +    if (!nbl->bSimple)
 +    {
 +        nbl->excl        = NULL;
 +        nbl->excl_nalloc = 0;
 +        nbl->nexcl       = 0;
 +        check_excl_space(nbl, 1);
 +        nbl->nexcl       = 1;
 +        set_no_excls(&nbl->excl[0]);
 +    }
 +
 +    snew(nbl->work, 1);
 +#ifdef NBNXN_BBXXXX
 +    snew_aligned(nbl->work->bb_ci, GPU_NSUBCELL/STRIDE_PBB*NNBSBB_XXXX, NBNXN_MEM_ALIGN);
 +#else
 +    snew_aligned(nbl->work->bb_ci, GPU_NSUBCELL*NNBSBB_B, NBNXN_MEM_ALIGN);
 +#endif
 +    snew_aligned(nbl->work->x_ci, NBNXN_NA_SC_MAX*DIM, NBNXN_MEM_ALIGN);
 +#ifdef GMX_NBNXN_SIMD
 +    snew_aligned(nbl->work->x_ci_simd_4xn, 1, NBNXN_MEM_ALIGN);
 +    snew_aligned(nbl->work->x_ci_simd_2xnn, 1, NBNXN_MEM_ALIGN);
 +#endif
 +    snew_aligned(nbl->work->d2, GPU_NSUBCELL, NBNXN_MEM_ALIGN);
 +
 +    nbl->work->sort            = NULL;
 +    nbl->work->sort_nalloc     = 0;
 +    nbl->work->sci_sort        = NULL;
 +    nbl->work->sci_sort_nalloc = 0;
 +}
 +
 +void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
 +                             gmx_bool bSimple, gmx_bool bCombined,
 +                             nbnxn_alloc_t *alloc,
 +                             nbnxn_free_t  *free)
 +{
 +    int i;
 +
 +    nbl_list->bSimple   = bSimple;
 +    nbl_list->bCombined = bCombined;
 +
 +    nbl_list->nnbl = gmx_omp_nthreads_get(emntNonbonded);
 +
 +    if (!nbl_list->bCombined &&
 +        nbl_list->nnbl > NBNXN_BUFFERFLAG_MAX_THREADS)
 +    {
 +        gmx_fatal(FARGS, "%d OpenMP threads were requested. Since the non-bonded force buffer reduction is prohibitively slow with more than %d threads, we do not allow this. Use %d or less OpenMP threads.",
 +                  nbl_list->nnbl, NBNXN_BUFFERFLAG_MAX_THREADS, NBNXN_BUFFERFLAG_MAX_THREADS);
 +    }
 +
 +    snew(nbl_list->nbl, nbl_list->nnbl);
 +    /* Execute in order to avoid memory interleaving between threads */
 +#pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static)
 +    for (i = 0; i < nbl_list->nnbl; i++)
 +    {
 +        /* Allocate the nblist data structure locally on each thread
 +         * to optimize memory access for NUMA architectures.
 +         */
 +        snew(nbl_list->nbl[i], 1);
 +
 +        /* Only list 0 is used on the GPU, use normal allocation for i>0 */
 +        if (i == 0)
 +        {
 +            nbnxn_init_pairlist(nbl_list->nbl[i], nbl_list->bSimple, alloc, free);
 +        }
 +        else
 +        {
 +            nbnxn_init_pairlist(nbl_list->nbl[i], nbl_list->bSimple, NULL, NULL);
 +        }
 +    }
 +}
 +
 +/* Print statistics of a pair list, used for debug output */
 +static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl,
 +                                           const nbnxn_search_t nbs, real rl)
 +{
 +    const nbnxn_grid_t *grid;
 +    int                 cs[SHIFTS];
 +    int                 s, i, j;
 +    int                 npexcl;
 +
 +    /* This code only produces correct statistics with domain decomposition */
 +    grid = &nbs->grid[0];
 +
 +    fprintf(fp, "nbl nci %d ncj %d\n",
 +            nbl->nci, nbl->ncj);
 +    fprintf(fp, "nbl na_sc %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
 +            nbl->na_sc, rl, nbl->ncj, nbl->ncj/(double)grid->nc,
 +            nbl->ncj/(double)grid->nc*grid->na_sc,
 +            nbl->ncj/(double)grid->nc*grid->na_sc/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nc*grid->na_sc/det(nbs->box)));
 +
 +    fprintf(fp, "nbl average j cell list length %.1f\n",
 +            0.25*nbl->ncj/(double)nbl->nci);
 +
 +    for (s = 0; s < SHIFTS; s++)
 +    {
 +        cs[s] = 0;
 +    }
 +    npexcl = 0;
 +    for (i = 0; i < nbl->nci; i++)
 +    {
 +        cs[nbl->ci[i].shift & NBNXN_CI_SHIFT] +=
 +            nbl->ci[i].cj_ind_end - nbl->ci[i].cj_ind_start;
 +
 +        j = nbl->ci[i].cj_ind_start;
 +        while (j < nbl->ci[i].cj_ind_end &&
 +               nbl->cj[j].excl != NBNXN_INT_MASK_ALL)
 +        {
 +            npexcl++;
 +            j++;
 +        }
 +    }
 +    fprintf(fp, "nbl cell pairs, total: %d excl: %d %.1f%%\n",
 +            nbl->ncj, npexcl, 100*npexcl/(double)nbl->ncj);
 +    for (s = 0; s < SHIFTS; s++)
 +    {
 +        if (cs[s] > 0)
 +        {
 +            fprintf(fp, "nbl shift %2d ncj %3d\n", s, cs[s]);
 +        }
 +    }
 +}
 +
 +/* Print statistics of a pair lists, used for debug output */
 +static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *nbl,
 +                                             const nbnxn_search_t nbs, real rl)
 +{
 +    const nbnxn_grid_t *grid;
 +    int                 i, j4, j, si, b;
 +    int                 c[GPU_NSUBCELL+1];
 +
 +    /* This code only produces correct statistics with domain decomposition */
 +    grid = &nbs->grid[0];
 +
 +    fprintf(fp, "nbl nsci %d ncj4 %d nsi %d excl4 %d\n",
 +            nbl->nsci, nbl->ncj4, nbl->nci_tot, nbl->nexcl);
 +    fprintf(fp, "nbl na_c %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
 +            nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/(double)grid->nsubc_tot,
 +            nbl->nci_tot/(double)grid->nsubc_tot*grid->na_c,
 +            nbl->nci_tot/(double)grid->nsubc_tot*grid->na_c/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nsubc_tot*grid->na_c/det(nbs->box)));
 +
 +    fprintf(fp, "nbl average j super cell list length %.1f\n",
 +            0.25*nbl->ncj4/(double)nbl->nsci);
 +    fprintf(fp, "nbl average i sub cell list length %.1f\n",
 +            nbl->nci_tot/((double)nbl->ncj4));
 +
 +    for (si = 0; si <= GPU_NSUBCELL; si++)
 +    {
 +        c[si] = 0;
 +    }
 +    for (i = 0; i < nbl->nsci; i++)
 +    {
 +        for (j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++)
 +        {
 +            for (j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++)
 +            {
 +                b = 0;
 +                for (si = 0; si < GPU_NSUBCELL; si++)
 +                {
 +                    if (nbl->cj4[j4].imei[0].imask & (1U << (j*GPU_NSUBCELL + si)))
 +                    {
 +                        b++;
 +                    }
 +                }
 +                c[b]++;
 +            }
 +        }
 +    }
 +    for (b = 0; b <= GPU_NSUBCELL; b++)
 +    {
 +        fprintf(fp, "nbl j-list #i-subcell %d %7d %4.1f\n",
 +                b, c[b], 100.0*c[b]/(double)(nbl->ncj4*NBNXN_GPU_JGROUP_SIZE));
 +    }
 +}
 +
 +/* Returns a pointer to the exclusion mask for cj4-unit cj4, warp warp */
 +static void low_get_nbl_exclusions(nbnxn_pairlist_t *nbl, int cj4,
 +                                   int warp, nbnxn_excl_t **excl)
 +{
 +    if (nbl->cj4[cj4].imei[warp].excl_ind == 0)
 +    {
 +        /* No exclusions set, make a new list entry */
 +        nbl->cj4[cj4].imei[warp].excl_ind = nbl->nexcl;
 +        nbl->nexcl++;
 +        *excl = &nbl->excl[nbl->cj4[cj4].imei[warp].excl_ind];
 +        set_no_excls(*excl);
 +    }
 +    else
 +    {
 +        /* We already have some exclusions, new ones can be added to the list */
 +        *excl = &nbl->excl[nbl->cj4[cj4].imei[warp].excl_ind];
 +    }
 +}
 +
 +/* Returns a pointer to the exclusion mask for cj4-unit cj4, warp warp,
 + * allocates extra memory, if necessary.
 + */
 +static void get_nbl_exclusions_1(nbnxn_pairlist_t *nbl, int cj4,
 +                                 int warp, nbnxn_excl_t **excl)
 +{
 +    if (nbl->cj4[cj4].imei[warp].excl_ind == 0)
 +    {
 +        /* We need to make a new list entry, check if we have space */
 +        check_excl_space(nbl, 1);
 +    }
 +    low_get_nbl_exclusions(nbl, cj4, warp, excl);
 +}
 +
 +/* Returns pointers to the exclusion mask for cj4-unit cj4 for both warps,
 + * allocates extra memory, if necessary.
 + */
 +static void get_nbl_exclusions_2(nbnxn_pairlist_t *nbl, int cj4,
 +                                 nbnxn_excl_t **excl_w0,
 +                                 nbnxn_excl_t **excl_w1)
 +{
 +    /* Check for space we might need */
 +    check_excl_space(nbl, 2);
 +
 +    low_get_nbl_exclusions(nbl, cj4, 0, excl_w0);
 +    low_get_nbl_exclusions(nbl, cj4, 1, excl_w1);
 +}
 +
 +/* Sets the self exclusions i=j and pair exclusions i>j */
 +static void set_self_and_newton_excls_supersub(nbnxn_pairlist_t *nbl,
 +                                               int cj4_ind, int sj_offset,
 +                                               int si)
 +{
 +    nbnxn_excl_t *excl[2];
 +    int           ei, ej, w;
 +
 +    /* Here we only set the set self and double pair exclusions */
 +
 +    get_nbl_exclusions_2(nbl, cj4_ind, &excl[0], &excl[1]);
 +
 +    /* Only minor < major bits set */
 +    for (ej = 0; ej < nbl->na_ci; ej++)
 +    {
 +        w = (ej>>2);
 +        for (ei = ej; ei < nbl->na_ci; ei++)
 +        {
 +            excl[w]->pair[(ej & (NBNXN_GPU_JGROUP_SIZE-1))*nbl->na_ci + ei] &=
 +                ~(1U << (sj_offset*GPU_NSUBCELL + si));
 +        }
 +    }
 +}
 +
 +/* Returns a diagonal or off-diagonal interaction mask for plain C lists */
 +static unsigned int get_imask(gmx_bool rdiag, int ci, int cj)
 +{
 +    return (rdiag && ci == cj ? NBNXN_INT_MASK_DIAG : NBNXN_INT_MASK_ALL);
 +}
 +
 +/* Returns a diagonal or off-diagonal interaction mask for SIMD128 lists */
 +static unsigned int get_imask_x86_simd128(gmx_bool rdiag, int ci, int cj)
 +{
 +#ifndef GMX_DOUBLE /* cj-size = 4 */
 +    return (rdiag && ci == cj ? NBNXN_INT_MASK_DIAG : NBNXN_INT_MASK_ALL);
 +#else              /* cj-size = 2 */
 +    return (rdiag && ci*2 == cj ? NBNXN_INT_MASK_DIAG_J2_0 :
 +            (rdiag && ci*2+1 == cj ? NBNXN_INT_MASK_DIAG_J2_1 :
 +             NBNXN_INT_MASK_ALL));
 +#endif
 +}
 +
 +/* Returns a diagonal or off-diagonal interaction mask for SIMD256 lists */
 +static unsigned int get_imask_x86_simd256(gmx_bool rdiag, int ci, int cj)
 +{
 +#ifndef GMX_DOUBLE /* cj-size = 8 */
 +    return (rdiag && ci == cj*2 ? NBNXN_INT_MASK_DIAG_J8_0 :
 +            (rdiag && ci == cj*2+1 ? NBNXN_INT_MASK_DIAG_J8_1 :
 +             NBNXN_INT_MASK_ALL));
 +#else              /* cj-size = 4 */
 +    return (rdiag && ci == cj ? NBNXN_INT_MASK_DIAG : NBNXN_INT_MASK_ALL);
 +#endif
 +}
 +
 +#ifdef GMX_NBNXN_SIMD
 +#if GMX_NBNXN_SIMD_BITWIDTH == 128
 +#define get_imask_x86_simd_4xn  get_imask_x86_simd128
 +#else
 +#if GMX_NBNXN_SIMD_BITWIDTH == 256
 +#define get_imask_x86_simd_4xn  get_imask_x86_simd256
 +#define get_imask_x86_simd_2xnn get_imask_x86_simd128
 +#else
 +#error "unsupported GMX_NBNXN_SIMD_BITWIDTH"
 +#endif
 +#endif
 +#endif
 +
 +/* Plain C code for making a pair list of cell ci vs cell cjf-cjl.
 + * Checks bounding box distances and possibly atom pair distances.
 + */
 +static void make_cluster_list_simple(const nbnxn_grid_t *gridj,
 +                                     nbnxn_pairlist_t *nbl,
 +                                     int ci, int cjf, int cjl,
 +                                     gmx_bool remove_sub_diag,
 +                                     const real *x_j,
 +                                     real rl2, float rbb2,
 +                                     int *ndistc)
 +{
 +    const nbnxn_list_work_t *work;
 +
 +    const float             *bb_ci;
 +    const real              *x_ci;
 +
 +    gmx_bool                 InRange;
 +    real                     d2;
 +    int                      cjf_gl, cjl_gl, cj;
 +
 +    work = nbl->work;
 +
 +    bb_ci = nbl->work->bb_ci;
 +    x_ci  = nbl->work->x_ci;
 +
 +    InRange = FALSE;
 +    while (!InRange && cjf <= cjl)
 +    {
 +        d2       = subc_bb_dist2(0, bb_ci, cjf, gridj->bb);
 +        *ndistc += 2;
 +
 +        /* Check if the distance is within the distance where
 +         * we use only the bounding box distance rbb,
 +         * or within the cut-off and there is at least one atom pair
 +         * within the cut-off.
 +         */
 +        if (d2 < rbb2)
 +        {
 +            InRange = TRUE;
 +        }
 +        else if (d2 < rl2)
 +        {
 +            int i, j;
 +
 +            cjf_gl = gridj->cell0 + cjf;
 +            for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++)
 +            {
 +                for (j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++)
 +                {
 +                    InRange = InRange ||
 +                        (sqr(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) +
 +                         sqr(x_ci[i*STRIDE_XYZ+YY] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+YY]) +
 +                         sqr(x_ci[i*STRIDE_XYZ+ZZ] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+ZZ]) < rl2);
 +                }
 +            }
 +            *ndistc += NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE;
 +        }
 +        if (!InRange)
 +        {
 +            cjf++;
 +        }
 +    }
 +    if (!InRange)
 +    {
 +        return;
 +    }
 +
 +    InRange = FALSE;
 +    while (!InRange && cjl > cjf)
 +    {
 +        d2       = subc_bb_dist2(0, bb_ci, cjl, gridj->bb);
 +        *ndistc += 2;
 +
 +        /* Check if the distance is within the distance where
 +         * we use only the bounding box distance rbb,
 +         * or within the cut-off and there is at least one atom pair
 +         * within the cut-off.
 +         */
 +        if (d2 < rbb2)
 +        {
 +            InRange = TRUE;
 +        }
 +        else if (d2 < rl2)
 +        {
 +            int i, j;
 +
 +            cjl_gl = gridj->cell0 + cjl;
 +            for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++)
 +            {
 +                for (j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++)
 +                {
 +                    InRange = InRange ||
 +                        (sqr(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) +
 +                         sqr(x_ci[i*STRIDE_XYZ+YY] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+YY]) +
 +                         sqr(x_ci[i*STRIDE_XYZ+ZZ] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+ZZ]) < rl2);
 +                }
 +            }
 +            *ndistc += NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE;
 +        }
 +        if (!InRange)
 +        {
 +            cjl--;
 +        }
 +    }
 +
 +    if (cjf <= cjl)
 +    {
 +        for (cj = cjf; cj <= cjl; cj++)
 +        {
 +            /* Store cj and the interaction mask */
 +            nbl->cj[nbl->ncj].cj   = gridj->cell0 + cj;
 +            nbl->cj[nbl->ncj].excl = get_imask(remove_sub_diag, ci, cj);
 +            nbl->ncj++;
 +        }
 +        /* Increase the closing index in i super-cell list */
 +        nbl->ci[nbl->nci].cj_ind_end = nbl->ncj;
 +    }
 +}
 +
 +#ifdef GMX_NBNXN_SIMD_4XN
 +#include "nbnxn_search_simd_4xn.h"
 +#endif
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +#include "nbnxn_search_simd_2xnn.h"
 +#endif
 +
 +/* Plain C or SSE code for making a pair list of super-cell sci vs scj.
 + * Checks bounding box distances and possibly atom pair distances.
 + */
 +static void make_cluster_list_supersub(const nbnxn_search_t nbs,
 +                                       const nbnxn_grid_t *gridi,
 +                                       const nbnxn_grid_t *gridj,
 +                                       nbnxn_pairlist_t *nbl,
 +                                       int sci, int scj,
 +                                       gmx_bool sci_equals_scj,
 +                                       int stride, const real *x,
 +                                       real rl2, float rbb2,
 +                                       int *ndistc)
 +{
 +    int          na_c;
 +    int          npair;
 +    int          cjo, ci1, ci, cj, cj_gl;
 +    int          cj4_ind, cj_offset;
 +    unsigned     imask;
 +    nbnxn_cj4_t *cj4;
 +    const float *bb_ci;
 +    const real  *x_ci;
 +    float       *d2l, d2;
 +    int          w;
 +#define PRUNE_LIST_CPU_ONE
 +#ifdef PRUNE_LIST_CPU_ONE
 +    int  ci_last = -1;
 +#endif
 +
 +    d2l = nbl->work->d2;
 +
 +    bb_ci = nbl->work->bb_ci;
 +    x_ci  = nbl->work->x_ci;
 +
 +    na_c = gridj->na_c;
 +
 +    for (cjo = 0; cjo < gridj->nsubc[scj]; cjo++)
 +    {
 +        cj4_ind   = (nbl->work->cj_ind >> NBNXN_GPU_JGROUP_SIZE_2LOG);
 +        cj_offset = nbl->work->cj_ind - cj4_ind*NBNXN_GPU_JGROUP_SIZE;
 +        cj4       = &nbl->cj4[cj4_ind];
 +
 +        cj = scj*GPU_NSUBCELL + cjo;
 +
 +        cj_gl = gridj->cell0*GPU_NSUBCELL + cj;
 +
 +        /* Initialize this j-subcell i-subcell list */
 +        cj4->cj[cj_offset] = cj_gl;
 +        imask              = 0;
 +
 +        if (sci_equals_scj)
 +        {
 +            ci1 = cjo + 1;
 +        }
 +        else
 +        {
 +            ci1 = gridi->nsubc[sci];
 +        }
 +
 +#ifdef NBNXN_BBXXXX
 +        /* Determine all ci1 bb distances in one call with SSE */
 +        subc_bb_dist2_sse_xxxx(gridj->bb+(cj>>STRIDE_PBB_2LOG)*NNBSBB_XXXX+(cj & (STRIDE_PBB-1)),
 +                               ci1, bb_ci, d2l);
 +        *ndistc += na_c*2;
 +#endif
 +
 +        npair = 0;
 +        /* We use a fixed upper-bound instead of ci1 to help optimization */
 +        for (ci = 0; ci < GPU_NSUBCELL; ci++)
 +        {
 +            if (ci == ci1)
 +            {
 +                break;
 +            }
 +
 +#ifndef NBNXN_BBXXXX
 +            /* Determine the bb distance between ci and cj */
 +            d2l[ci]  = subc_bb_dist2(ci, bb_ci, cj, gridj->bb);
 +            *ndistc += 2;
 +#endif
 +            d2 = d2l[ci];
 +
 +#ifdef PRUNE_LIST_CPU_ALL
 +            /* Check if the distance is within the distance where
 +             * we use only the bounding box distance rbb,
 +             * or within the cut-off and there is at least one atom pair
 +             * within the cut-off. This check is very costly.
 +             */
 +            *ndistc += na_c*na_c;
 +            if (d2 < rbb2 ||
 +                (d2 < rl2 &&
 +#ifdef NBNXN_PBB_SSE
 +                subc_in_range_sse8
 +#else
 +                subc_in_range_x
 +#endif
 +                    (na_c, ci, x_ci, cj_gl, stride, x, rl2)))
 +#else
 +            /* Check if the distance between the two bounding boxes
 +             * in within the pair-list cut-off.
 +             */
 +            if (d2 < rl2)
 +#endif
 +            {
 +                /* Flag this i-subcell to be taken into account */
 +                imask |= (1U << (cj_offset*GPU_NSUBCELL+ci));
 +
 +#ifdef PRUNE_LIST_CPU_ONE
 +                ci_last = ci;
 +#endif
 +
 +                npair++;
 +            }
 +        }
 +
 +#ifdef PRUNE_LIST_CPU_ONE
 +        /* If we only found 1 pair, check if any atoms are actually
 +         * within the cut-off, so we could get rid of it.
 +         */
 +        if (npair == 1 && d2l[ci_last] >= rbb2)
 +        {
 +            /* Avoid using function pointers here, as it's slower */
 +            if (
 +#ifdef NBNXN_PBB_SSE
 +                !subc_in_range_sse8
 +#else
 +                !subc_in_range_x
 +#endif
 +                    (na_c, ci_last, x_ci, cj_gl, stride, x, rl2))
 +            {
 +                imask &= ~(1U << (cj_offset*GPU_NSUBCELL+ci_last));
 +                npair--;
 +            }
 +        }
 +#endif
 +
 +        if (npair > 0)
 +        {
 +            /* We have a useful sj entry, close it now */
 +
 +            /* Set the exclucions for the ci== sj entry.
 +             * Here we don't bother to check if this entry is actually flagged,
 +             * as it will nearly always be in the list.
 +             */
 +            if (sci_equals_scj)
 +            {
 +                set_self_and_newton_excls_supersub(nbl, cj4_ind, cj_offset, cjo);
 +            }
 +
 +            /* Copy the cluster interaction mask to the list */
 +            for (w = 0; w < NWARP; w++)
 +            {
 +                cj4->imei[w].imask |= imask;
 +            }
 +
 +            nbl->work->cj_ind++;
 +
 +            /* Keep the count */
 +            nbl->nci_tot += npair;
 +
 +            /* Increase the closing index in i super-cell list */
 +            nbl->sci[nbl->nsci].cj4_ind_end =
 +                ((nbl->work->cj_ind+NBNXN_GPU_JGROUP_SIZE-1) >> NBNXN_GPU_JGROUP_SIZE_2LOG);
 +        }
 +    }
 +}
 +
 +/* Set all atom-pair exclusions from the topology stored in excl
 + * as masks in the pair-list for simple list i-entry nbl_ci
 + */
 +static void set_ci_top_excls(const nbnxn_search_t nbs,
 +                             nbnxn_pairlist_t    *nbl,
 +                             gmx_bool             diagRemoved,
 +                             int                  na_ci_2log,
 +                             int                  na_cj_2log,
 +                             const nbnxn_ci_t    *nbl_ci,
 +                             const t_blocka      *excl)
 +{
 +    const int    *cell;
 +    int           ci;
 +    int           cj_ind_first, cj_ind_last;
 +    int           cj_first, cj_last;
 +    int           ndirect;
 +    int           i, ai, aj, si, eind, ge, se;
 +    int           found, cj_ind_0, cj_ind_1, cj_ind_m;
 +    int           cj_m;
 +    gmx_bool      Found_si;
 +    int           si_ind;
 +    nbnxn_excl_t *nbl_excl;
 +    int           inner_i, inner_e;
 +
 +    cell = nbs->cell;
 +
 +    if (nbl_ci->cj_ind_end == nbl_ci->cj_ind_start)
 +    {
 +        /* Empty list */
 +        return;
 +    }
 +
 +    ci = nbl_ci->ci;
 +
 +    cj_ind_first = nbl_ci->cj_ind_start;
 +    cj_ind_last  = nbl->ncj - 1;
 +
 +    cj_first = nbl->cj[cj_ind_first].cj;
 +    cj_last  = nbl->cj[cj_ind_last].cj;
 +
 +    /* Determine how many contiguous j-cells we have starting
 +     * from the first i-cell. This number can be used to directly
 +     * calculate j-cell indices for excluded atoms.
 +     */
 +    ndirect = 0;
 +    if (na_ci_2log == na_cj_2log)
 +    {
 +        while (cj_ind_first + ndirect <= cj_ind_last &&
 +               nbl->cj[cj_ind_first+ndirect].cj == ci + ndirect)
 +        {
 +            ndirect++;
 +        }
 +    }
 +#ifdef NBNXN_SEARCH_BB_SSE
 +    else
 +    {
 +        while (cj_ind_first + ndirect <= cj_ind_last &&
 +               nbl->cj[cj_ind_first+ndirect].cj == ci_to_cj(na_cj_2log, ci) + ndirect)
 +        {
 +            ndirect++;
 +        }
 +    }
 +#endif
 +
 +    /* Loop over the atoms in the i super-cell */
 +    for (i = 0; i < nbl->na_sc; i++)
 +    {
 +        ai = nbs->a[ci*nbl->na_sc+i];
 +        if (ai >= 0)
 +        {
 +            si  = (i>>na_ci_2log);
 +
 +            /* Loop over the topology-based exclusions for this i-atom */
 +            for (eind = excl->index[ai]; eind < excl->index[ai+1]; eind++)
 +            {
 +                aj = excl->a[eind];
 +
 +                if (aj == ai)
 +                {
 +                    /* The self exclusion are already set, save some time */
 +                    continue;
 +                }
 +
 +                ge = cell[aj];
 +
 +                /* Without shifts we only calculate interactions j>i
 +                 * for one-way pair-lists.
 +                 */
 +                if (diagRemoved && ge <= ci*nbl->na_sc + i)
 +                {
 +                    continue;
 +                }
 +
 +                se = (ge >> na_cj_2log);
 +
 +                /* Could the cluster se be in our list? */
 +                if (se >= cj_first && se <= cj_last)
 +                {
 +                    if (se < cj_first + ndirect)
 +                    {
 +                        /* We can calculate cj_ind directly from se */
 +                        found = cj_ind_first + se - cj_first;
 +                    }
 +                    else
 +                    {
 +                        /* Search for se using bisection */
 +                        found    = -1;
 +                        cj_ind_0 = cj_ind_first + ndirect;
 +                        cj_ind_1 = cj_ind_last + 1;
 +                        while (found == -1 && cj_ind_0 < cj_ind_1)
 +                        {
 +                            cj_ind_m = (cj_ind_0 + cj_ind_1)>>1;
 +
 +                            cj_m = nbl->cj[cj_ind_m].cj;
 +
 +                            if (se == cj_m)
 +                            {
 +                                found = cj_ind_m;
 +                            }
 +                            else if (se < cj_m)
 +                            {
 +                                cj_ind_1 = cj_ind_m;
 +                            }
 +                            else
 +                            {
 +                                cj_ind_0 = cj_ind_m + 1;
 +                            }
 +                        }
 +                    }
 +
 +                    if (found >= 0)
 +                    {
 +                        inner_i = i  - (si << na_ci_2log);
 +                        inner_e = ge - (se << na_cj_2log);
 +
 +                        nbl->cj[found].excl &= ~(1U<<((inner_i<<na_cj_2log) + inner_e));
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* Set all atom-pair exclusions from the topology stored in excl
 + * as masks in the pair-list for i-super-cell entry nbl_sci
 + */
 +static void set_sci_top_excls(const nbnxn_search_t nbs,
 +                              nbnxn_pairlist_t    *nbl,
 +                              gmx_bool             diagRemoved,
 +                              int                  na_c_2log,
 +                              const nbnxn_sci_t   *nbl_sci,
 +                              const t_blocka      *excl)
 +{
 +    const int    *cell;
 +    int           na_c;
 +    int           sci;
 +    int           cj_ind_first, cj_ind_last;
 +    int           cj_first, cj_last;
 +    int           ndirect;
 +    int           i, ai, aj, si, eind, ge, se;
 +    int           found, cj_ind_0, cj_ind_1, cj_ind_m;
 +    int           cj_m;
 +    gmx_bool      Found_si;
 +    int           si_ind;
 +    nbnxn_excl_t *nbl_excl;
 +    int           inner_i, inner_e, w;
 +
 +    cell = nbs->cell;
 +
 +    na_c = nbl->na_ci;
 +
 +    if (nbl_sci->cj4_ind_end == nbl_sci->cj4_ind_start)
 +    {
 +        /* Empty list */
 +        return;
 +    }
 +
 +    sci = nbl_sci->sci;
 +
 +    cj_ind_first = nbl_sci->cj4_ind_start*NBNXN_GPU_JGROUP_SIZE;
 +    cj_ind_last  = nbl->work->cj_ind - 1;
 +
 +    cj_first = nbl->cj4[nbl_sci->cj4_ind_start].cj[0];
 +    cj_last  = nbl_cj(nbl, cj_ind_last);
 +
 +    /* Determine how many contiguous j-clusters we have starting
 +     * from the first i-cluster. This number can be used to directly
 +     * calculate j-cluster indices for excluded atoms.
 +     */
 +    ndirect = 0;
 +    while (cj_ind_first + ndirect <= cj_ind_last &&
 +           nbl_cj(nbl, cj_ind_first+ndirect) == sci*GPU_NSUBCELL + ndirect)
 +    {
 +        ndirect++;
 +    }
 +
 +    /* Loop over the atoms in the i super-cell */
 +    for (i = 0; i < nbl->na_sc; i++)
 +    {
 +        ai = nbs->a[sci*nbl->na_sc+i];
 +        if (ai >= 0)
 +        {
 +            si  = (i>>na_c_2log);
 +
 +            /* Loop over the topology-based exclusions for this i-atom */
 +            for (eind = excl->index[ai]; eind < excl->index[ai+1]; eind++)
 +            {
 +                aj = excl->a[eind];
 +
 +                if (aj == ai)
 +                {
 +                    /* The self exclusion are already set, save some time */
 +                    continue;
 +                }
 +
 +                ge = cell[aj];
 +
 +                /* Without shifts we only calculate interactions j>i
 +                 * for one-way pair-lists.
 +                 */
 +                if (diagRemoved && ge <= sci*nbl->na_sc + i)
 +                {
 +                    continue;
 +                }
 +
 +                se = ge>>na_c_2log;
 +                /* Could the cluster se be in our list? */
 +                if (se >= cj_first && se <= cj_last)
 +                {
 +                    if (se < cj_first + ndirect)
 +                    {
 +                        /* We can calculate cj_ind directly from se */
 +                        found = cj_ind_first + se - cj_first;
 +                    }
 +                    else
 +                    {
 +                        /* Search for se using bisection */
 +                        found    = -1;
 +                        cj_ind_0 = cj_ind_first + ndirect;
 +                        cj_ind_1 = cj_ind_last + 1;
 +                        while (found == -1 && cj_ind_0 < cj_ind_1)
 +                        {
 +                            cj_ind_m = (cj_ind_0 + cj_ind_1)>>1;
 +
 +                            cj_m = nbl_cj(nbl, cj_ind_m);
 +
 +                            if (se == cj_m)
 +                            {
 +                                found = cj_ind_m;
 +                            }
 +                            else if (se < cj_m)
 +                            {
 +                                cj_ind_1 = cj_ind_m;
 +                            }
 +                            else
 +                            {
 +                                cj_ind_0 = cj_ind_m + 1;
 +                            }
 +                        }
 +                    }
 +
 +                    if (found >= 0)
 +                    {
 +                        inner_i = i  - si*na_c;
 +                        inner_e = ge - se*na_c;
 +
 +/* Macro for getting the index of atom a within a cluster */
 +#define AMODCJ4(a)  ((a) & (NBNXN_GPU_JGROUP_SIZE - 1))
 +/* Macro for converting an atom number to a cluster number */
 +#define A2CJ4(a)    ((a) >> NBNXN_GPU_JGROUP_SIZE_2LOG)
 +/* Macro for getting the index of an i-atom within a warp */
 +#define AMODWI(a)   ((a) & (NBNXN_GPU_CLUSTER_SIZE/2 - 1))
 +
 +                        if (nbl_imask0(nbl, found) & (1U << (AMODCJ4(found)*GPU_NSUBCELL + si)))
 +                        {
 +                            w       = (inner_e >> 2);
 +
 +                            get_nbl_exclusions_1(nbl, A2CJ4(found), w, &nbl_excl);
 +
 +                            nbl_excl->pair[AMODWI(inner_e)*nbl->na_ci+inner_i] &=
 +                                ~(1U << (AMODCJ4(found)*GPU_NSUBCELL + si));
 +                        }
 +
 +#undef AMODCJ4
 +#undef A2CJ4
 +#undef AMODWI
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* Reallocate the simple ci list for at least n entries */
 +static void nb_realloc_ci(nbnxn_pairlist_t *nbl, int n)
 +{
 +    nbl->ci_nalloc = over_alloc_small(n);
 +    nbnxn_realloc_void((void **)&nbl->ci,
 +                       nbl->nci*sizeof(*nbl->ci),
 +                       nbl->ci_nalloc*sizeof(*nbl->ci),
 +                       nbl->alloc, nbl->free);
 +}
 +
 +/* Reallocate the super-cell sci list for at least n entries */
 +static void nb_realloc_sci(nbnxn_pairlist_t *nbl, int n)
 +{
 +    nbl->sci_nalloc = over_alloc_small(n);
 +    nbnxn_realloc_void((void **)&nbl->sci,
 +                       nbl->nsci*sizeof(*nbl->sci),
 +                       nbl->sci_nalloc*sizeof(*nbl->sci),
 +                       nbl->alloc, nbl->free);
 +}
 +
 +/* Make a new ci entry at index nbl->nci */
 +static void new_ci_entry(nbnxn_pairlist_t *nbl, int ci, int shift, int flags,
 +                         nbnxn_list_work_t *work)
 +{
 +    if (nbl->nci + 1 > nbl->ci_nalloc)
 +    {
 +        nb_realloc_ci(nbl, nbl->nci+1);
 +    }
 +    nbl->ci[nbl->nci].ci            = ci;
 +    nbl->ci[nbl->nci].shift         = shift;
 +    /* Store the interaction flags along with the shift */
 +    nbl->ci[nbl->nci].shift        |= flags;
 +    nbl->ci[nbl->nci].cj_ind_start  = nbl->ncj;
 +    nbl->ci[nbl->nci].cj_ind_end    = nbl->ncj;
 +}
 +
 +/* Make a new sci entry at index nbl->nsci */
 +static void new_sci_entry(nbnxn_pairlist_t *nbl, int sci, int shift, int flags,
 +                          nbnxn_list_work_t *work)
 +{
 +    if (nbl->nsci + 1 > nbl->sci_nalloc)
 +    {
 +        nb_realloc_sci(nbl, nbl->nsci+1);
 +    }
 +    nbl->sci[nbl->nsci].sci           = sci;
 +    nbl->sci[nbl->nsci].shift         = shift;
 +    nbl->sci[nbl->nsci].cj4_ind_start = nbl->ncj4;
 +    nbl->sci[nbl->nsci].cj4_ind_end   = nbl->ncj4;
 +}
 +
 +/* Sort the simple j-list cj on exclusions.
 + * Entries with exclusions will all be sorted to the beginning of the list.
 + */
 +static void sort_cj_excl(nbnxn_cj_t *cj, int ncj,
 +                         nbnxn_list_work_t *work)
 +{
 +    int jnew, j;
 +
 +    if (ncj > work->cj_nalloc)
 +    {
 +        work->cj_nalloc = over_alloc_large(ncj);
 +        srenew(work->cj, work->cj_nalloc);
 +    }
 +
 +    /* Make a list of the j-cells involving exclusions */
 +    jnew = 0;
 +    for (j = 0; j < ncj; j++)
 +    {
 +        if (cj[j].excl != NBNXN_INT_MASK_ALL)
 +        {
 +            work->cj[jnew++] = cj[j];
 +        }
 +    }
 +    /* Check if there are exclusions at all or not just the first entry */
 +    if (!((jnew == 0) ||
 +          (jnew == 1 && cj[0].excl != NBNXN_INT_MASK_ALL)))
 +    {
 +        for (j = 0; j < ncj; j++)
 +        {
 +            if (cj[j].excl == NBNXN_INT_MASK_ALL)
 +            {
 +                work->cj[jnew++] = cj[j];
 +            }
 +        }
 +        for (j = 0; j < ncj; j++)
 +        {
 +            cj[j] = work->cj[j];
 +        }
 +    }
 +}
 +
 +/* Close this simple list i entry */
 +static void close_ci_entry_simple(nbnxn_pairlist_t *nbl)
 +{
 +    int jlen;
 +
 +    /* All content of the new ci entry have already been filled correctly,
 +     * we only need to increase the count here (for non empty lists).
 +     */
 +    jlen = nbl->ci[nbl->nci].cj_ind_end - nbl->ci[nbl->nci].cj_ind_start;
 +    if (jlen > 0)
 +    {
 +        sort_cj_excl(nbl->cj+nbl->ci[nbl->nci].cj_ind_start, jlen, nbl->work);
 +
 +        /* The counts below are used for non-bonded pair/flop counts
 +         * and should therefore match the available kernel setups.
 +         */
 +        if (!(nbl->ci[nbl->nci].shift & NBNXN_CI_DO_COUL(0)))
 +        {
 +            nbl->work->ncj_noq += jlen;
 +        }
 +        else if ((nbl->ci[nbl->nci].shift & NBNXN_CI_HALF_LJ(0)) ||
 +                 !(nbl->ci[nbl->nci].shift & NBNXN_CI_DO_LJ(0)))
 +        {
 +            nbl->work->ncj_hlj += jlen;
 +        }
 +
 +        nbl->nci++;
 +    }
 +}
 +
 +/* Split sci entry for load balancing on the GPU.
 + * Splitting ensures we have enough lists to fully utilize the whole GPU.
 + * With progBal we generate progressively smaller lists, which improves
 + * load balancing. As we only know the current count on our own thread,
 + * we will need to estimate the current total amount of i-entries.
 + * As the lists get concatenated later, this estimate depends
 + * both on nthread and our own thread index.
 + */
 +static void split_sci_entry(nbnxn_pairlist_t *nbl,
 +                            int nsp_max_av, gmx_bool progBal, int nc_bal,
 +                            int thread, int nthread)
 +{
 +    int nsci_est;
 +    int nsp_max;
 +    int cj4_start, cj4_end, j4len, cj4;
 +    int sci;
 +    int nsp, nsp_sci, nsp_cj4, nsp_cj4_e, nsp_cj4_p;
 +    int p;
 +
 +    if (progBal)
 +    {
 +        /* Estimate the total numbers of ci's of the nblist combined
 +         * over all threads using the target number of ci's.
 +         */
 +        nsci_est = nc_bal*thread/nthread + nbl->nsci;
 +
 +        /* The first ci blocks should be larger, to avoid overhead.
 +         * The last ci blocks should be smaller, to improve load balancing.
 +         */
 +        nsp_max = max(1,
 +                      nsp_max_av*nc_bal*3/(2*(nsci_est - 1 + nc_bal)));
 +    }
 +    else
 +    {
 +        nsp_max = nsp_max_av;
 +    }
 +
 +    cj4_start = nbl->sci[nbl->nsci-1].cj4_ind_start;
 +    cj4_end   = nbl->sci[nbl->nsci-1].cj4_ind_end;
 +    j4len     = cj4_end - cj4_start;
 +
 +    if (j4len > 1 && j4len*GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE > nsp_max)
 +    {
 +        /* Remove the last ci entry and process the cj4's again */
 +        nbl->nsci -= 1;
 +
 +        sci        = nbl->nsci;
 +        nsp        = 0;
 +        nsp_sci    = 0;
 +        nsp_cj4_e  = 0;
 +        nsp_cj4    = 0;
 +        for (cj4 = cj4_start; cj4 < cj4_end; cj4++)
 +        {
 +            nsp_cj4_p = nsp_cj4;
 +            /* Count the number of cluster pairs in this cj4 group */
 +            nsp_cj4   = 0;
 +            for (p = 0; p < GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE; p++)
 +            {
 +                nsp_cj4 += (nbl->cj4[cj4].imei[0].imask >> p) & 1;
 +            }
 +
 +            if (nsp_cj4 > 0 && nsp + nsp_cj4 > nsp_max)
 +            {
 +                /* Split the list at cj4 */
 +                nbl->sci[sci].cj4_ind_end = cj4;
 +                /* Create a new sci entry */
 +                sci++;
 +                nbl->nsci++;
 +                if (nbl->nsci+1 > nbl->sci_nalloc)
 +                {
 +                    nb_realloc_sci(nbl, nbl->nsci+1);
 +                }
 +                nbl->sci[sci].sci           = nbl->sci[nbl->nsci-1].sci;
 +                nbl->sci[sci].shift         = nbl->sci[nbl->nsci-1].shift;
 +                nbl->sci[sci].cj4_ind_start = cj4;
 +                nsp_sci                     = nsp;
 +                nsp_cj4_e                   = nsp_cj4_p;
 +                nsp                         = 0;
 +            }
 +            nsp += nsp_cj4;
 +        }
 +
 +        /* Put the remaining cj4's in the last sci entry */
 +        nbl->sci[sci].cj4_ind_end = cj4_end;
 +
 +        /* Possibly balance out the last two sci's
 +         * by moving the last cj4 of the second last sci.
 +         */
 +        if (nsp_sci - nsp_cj4_e >= nsp + nsp_cj4_e)
 +        {
 +            nbl->sci[sci-1].cj4_ind_end--;
 +            nbl->sci[sci].cj4_ind_start--;
 +        }
 +
 +        nbl->nsci++;
 +    }
 +}
 +
 +/* Clost this super/sub list i entry */
 +static void close_ci_entry_supersub(nbnxn_pairlist_t *nbl,
 +                                    int nsp_max_av,
 +                                    gmx_bool progBal, int nc_bal,
 +                                    int thread, int nthread)
 +{
 +    int j4len, tlen;
 +    int nb, b;
 +
 +    /* All content of the new ci entry have already been filled correctly,
 +     * we only need to increase the count here (for non empty lists).
 +     */
 +    j4len = nbl->sci[nbl->nsci].cj4_ind_end - nbl->sci[nbl->nsci].cj4_ind_start;
 +    if (j4len > 0)
 +    {
 +        /* We can only have complete blocks of 4 j-entries in a list,
 +         * so round the count up before closing.
 +         */
 +        nbl->ncj4         = ((nbl->work->cj_ind + NBNXN_GPU_JGROUP_SIZE - 1) >> NBNXN_GPU_JGROUP_SIZE_2LOG);
 +        nbl->work->cj_ind = nbl->ncj4*NBNXN_GPU_JGROUP_SIZE;
 +
 +        nbl->nsci++;
 +
 +        if (nsp_max_av > 0)
 +        {
 +            /* Measure the size of the new entry and potentially split it */
 +            split_sci_entry(nbl, nsp_max_av, progBal, nc_bal, thread, nthread);
 +        }
 +    }
 +}
 +
 +/* Syncs the working array before adding another grid pair to the list */
 +static void sync_work(nbnxn_pairlist_t *nbl)
 +{
 +    if (!nbl->bSimple)
 +    {
 +        nbl->work->cj_ind   = nbl->ncj4*NBNXN_GPU_JGROUP_SIZE;
 +        nbl->work->cj4_init = nbl->ncj4;
 +    }
 +}
 +
 +/* Clears an nbnxn_pairlist_t data structure */
 +static void clear_pairlist(nbnxn_pairlist_t *nbl)
 +{
 +    nbl->nci           = 0;
 +    nbl->nsci          = 0;
 +    nbl->ncj           = 0;
 +    nbl->ncj4          = 0;
 +    nbl->nci_tot       = 0;
 +    nbl->nexcl         = 1;
 +
 +    nbl->work->ncj_noq = 0;
 +    nbl->work->ncj_hlj = 0;
 +}
 +
 +/* Sets a simple list i-cell bounding box, including PBC shift */
 +static void set_icell_bb_simple(const float *bb, int ci,
 +                                real shx, real shy, real shz,
 +                                float *bb_ci)
 +{
 +    int ia;
 +
 +    ia           = ci*NNBSBB_B;
 +    bb_ci[BBL_X] = bb[ia+BBL_X] + shx;
 +    bb_ci[BBL_Y] = bb[ia+BBL_Y] + shy;
 +    bb_ci[BBL_Z] = bb[ia+BBL_Z] + shz;
 +    bb_ci[BBU_X] = bb[ia+BBU_X] + shx;
 +    bb_ci[BBU_Y] = bb[ia+BBU_Y] + shy;
 +    bb_ci[BBU_Z] = bb[ia+BBU_Z] + shz;
 +}
 +
 +/* Sets a super-cell and sub cell bounding boxes, including PBC shift */
 +static void set_icell_bb_supersub(const float *bb, int ci,
 +                                  real shx, real shy, real shz,
 +                                  float *bb_ci)
 +{
 +    int ia, m, i;
 +
 +#ifdef NBNXN_BBXXXX
 +    ia = ci*(GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX;
 +    for (m = 0; m < (GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; m += NNBSBB_XXXX)
 +    {
 +        for (i = 0; i < STRIDE_PBB; i++)
 +        {
 +            bb_ci[m+0*STRIDE_PBB+i] = bb[ia+m+0*STRIDE_PBB+i] + shx;
 +            bb_ci[m+1*STRIDE_PBB+i] = bb[ia+m+1*STRIDE_PBB+i] + shy;
 +            bb_ci[m+2*STRIDE_PBB+i] = bb[ia+m+2*STRIDE_PBB+i] + shz;
 +            bb_ci[m+3*STRIDE_PBB+i] = bb[ia+m+3*STRIDE_PBB+i] + shx;
 +            bb_ci[m+4*STRIDE_PBB+i] = bb[ia+m+4*STRIDE_PBB+i] + shy;
 +            bb_ci[m+5*STRIDE_PBB+i] = bb[ia+m+5*STRIDE_PBB+i] + shz;
 +        }
 +    }
 +#else
 +    ia = ci*GPU_NSUBCELL*NNBSBB_B;
 +    for (i = 0; i < GPU_NSUBCELL*NNBSBB_B; i += NNBSBB_B)
 +    {
 +        bb_ci[i+BBL_X] = bb[ia+i+BBL_X] + shx;
 +        bb_ci[i+BBL_Y] = bb[ia+i+BBL_Y] + shy;
 +        bb_ci[i+BBL_Z] = bb[ia+i+BBL_Z] + shz;
 +        bb_ci[i+BBU_X] = bb[ia+i+BBU_X] + shx;
 +        bb_ci[i+BBU_Y] = bb[ia+i+BBU_Y] + shy;
 +        bb_ci[i+BBU_Z] = bb[ia+i+BBU_Z] + shz;
 +    }
 +#endif
 +}
 +
 +/* Copies PBC shifted i-cell atom coordinates x,y,z to working array */
 +static void icell_set_x_simple(int ci,
 +                               real shx, real shy, real shz,
 +                               int na_c,
 +                               int stride, const real *x,
 +                               nbnxn_list_work_t *work)
 +{
 +    int  ia, i;
 +
 +    ia = ci*NBNXN_CPU_CLUSTER_I_SIZE;
 +
 +    for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE; i++)
 +    {
 +        work->x_ci[i*STRIDE_XYZ+XX] = x[(ia+i)*stride+XX] + shx;
 +        work->x_ci[i*STRIDE_XYZ+YY] = x[(ia+i)*stride+YY] + shy;
 +        work->x_ci[i*STRIDE_XYZ+ZZ] = x[(ia+i)*stride+ZZ] + shz;
 +    }
 +}
 +
 +/* Copies PBC shifted super-cell atom coordinates x,y,z to working array */
 +static void icell_set_x_supersub(int ci,
 +                                 real shx, real shy, real shz,
 +                                 int na_c,
 +                                 int stride, const real *x,
 +                                 nbnxn_list_work_t *work)
 +{
 +    int  ia, i;
 +    real *x_ci;
 +
 +    x_ci = work->x_ci;
 +
 +    ia = ci*GPU_NSUBCELL*na_c;
 +    for (i = 0; i < GPU_NSUBCELL*na_c; i++)
 +    {
 +        x_ci[i*DIM + XX] = x[(ia+i)*stride + XX] + shx;
 +        x_ci[i*DIM + YY] = x[(ia+i)*stride + YY] + shy;
 +        x_ci[i*DIM + ZZ] = x[(ia+i)*stride + ZZ] + shz;
 +    }
 +}
 +
 +#ifdef NBNXN_SEARCH_BB_SSE
 +/* Copies PBC shifted super-cell packed atom coordinates to working array */
 +static void icell_set_x_supersub_sse8(int ci,
 +                                      real shx, real shy, real shz,
 +                                      int na_c,
 +                                      int stride, const real *x,
 +                                      nbnxn_list_work_t *work)
 +{
 +    int  si, io, ia, i, j;
 +    real *x_ci;
 +
 +    x_ci = work->x_ci;
 +
 +    for (si = 0; si < GPU_NSUBCELL; si++)
 +    {
 +        for (i = 0; i < na_c; i += STRIDE_PBB)
 +        {
 +            io = si*na_c + i;
 +            ia = ci*GPU_NSUBCELL*na_c + io;
 +            for (j = 0; j < STRIDE_PBB; j++)
 +            {
 +                x_ci[io*DIM + j + XX*STRIDE_PBB] = x[(ia+j)*stride+XX] + shx;
 +                x_ci[io*DIM + j + YY*STRIDE_PBB] = x[(ia+j)*stride+YY] + shy;
 +                x_ci[io*DIM + j + ZZ*STRIDE_PBB] = x[(ia+j)*stride+ZZ] + shz;
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +static real nbnxn_rlist_inc_nonloc_fac = 0.6;
 +
 +/* Due to the cluster size the effective pair-list is longer than
 + * that of a simple atom pair-list. This function gives the extra distance.
 + */
 +real nbnxn_get_rlist_effective_inc(int cluster_size, real atom_density)
 +{
 +    return ((0.5 + nbnxn_rlist_inc_nonloc_fac)*sqr(((cluster_size) - 1.0)/(cluster_size))*pow((cluster_size)/(atom_density), 1.0/3.0));
 +}
 +
 +/* Estimates the interaction volume^2 for non-local interactions */
 +static real nonlocal_vol2(const gmx_domdec_zones_t *zones, rvec ls, real r)
 +{
 +    int  z, d;
 +    real cl, ca, za;
 +    real vold_est;
 +    real vol2_est_tot;
 +
 +    vol2_est_tot = 0;
 +
 +    /* Here we simply add up the volumes of 1, 2 or 3 1D decomposition
 +     * not home interaction volume^2. As these volumes are not additive,
 +     * this is an overestimate, but it would only be significant in the limit
 +     * of small cells, where we anyhow need to split the lists into
 +     * as small parts as possible.
 +     */
 +
 +    for (z = 0; z < zones->n; z++)
 +    {
 +        if (zones->shift[z][XX] + zones->shift[z][YY] + zones->shift[z][ZZ] == 1)
 +        {
 +            cl = 0;
 +            ca = 1;
 +            za = 1;
 +            for (d = 0; d < DIM; d++)
 +            {
 +                if (zones->shift[z][d] == 0)
 +                {
 +                    cl += 0.5*ls[d];
 +                    ca *= ls[d];
 +                    za *= zones->size[z].x1[d] - zones->size[z].x0[d];
 +                }
 +            }
 +
 +            /* 4 octants of a sphere */
 +            vold_est  = 0.25*M_PI*r*r*r*r;
 +            /* 4 quarter pie slices on the edges */
 +            vold_est += 4*cl*M_PI/6.0*r*r*r;
 +            /* One rectangular volume on a face */
 +            vold_est += ca*0.5*r*r;
 +
 +            vol2_est_tot += vold_est*za;
 +        }
 +    }
 +
 +    return vol2_est_tot;
 +}
 +
 +/* Estimates the average size of a full j-list for super/sub setup */
 +static int get_nsubpair_max(const nbnxn_search_t nbs,
 +                            int                  iloc,
 +                            real                 rlist,
 +                            int                  min_ci_balanced)
 +{
 +    const nbnxn_grid_t *grid;
 +    rvec ls;
 +    real xy_diag2, r_eff_sup, vol_est, nsp_est, nsp_est_nl;
 +    int  nsubpair_max;
 +
 +    grid = &nbs->grid[0];
 +
 +    ls[XX] = (grid->c1[XX] - grid->c0[XX])/(grid->ncx*GPU_NSUBCELL_X);
 +    ls[YY] = (grid->c1[YY] - grid->c0[YY])/(grid->ncy*GPU_NSUBCELL_Y);
 +    ls[ZZ] = (grid->c1[ZZ] - grid->c0[ZZ])*grid->ncx*grid->ncy/(grid->nc*GPU_NSUBCELL_Z);
 +
 +    /* The average squared length of the diagonal of a sub cell */
 +    xy_diag2 = ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ];
 +
 +    /* The formulas below are a heuristic estimate of the average nsj per si*/
 +    r_eff_sup = rlist + nbnxn_rlist_inc_nonloc_fac*sqr((grid->na_c - 1.0)/grid->na_c)*sqrt(xy_diag2/3);
 +
 +    if (!nbs->DomDec || nbs->zones->n == 1)
 +    {
 +        nsp_est_nl = 0;
 +    }
 +    else
 +    {
 +        nsp_est_nl =
 +            sqr(grid->atom_density/grid->na_c)*
 +            nonlocal_vol2(nbs->zones, ls, r_eff_sup);
 +    }
 +
 +    if (LOCAL_I(iloc))
 +    {
 +        /* Sub-cell interacts with itself */
 +        vol_est  = ls[XX]*ls[YY]*ls[ZZ];
 +        /* 6/2 rectangular volume on the faces */
 +        vol_est += (ls[XX]*ls[YY] + ls[XX]*ls[ZZ] + ls[YY]*ls[ZZ])*r_eff_sup;
 +        /* 12/2 quarter pie slices on the edges */
 +        vol_est += 2*(ls[XX] + ls[YY] + ls[ZZ])*0.25*M_PI*sqr(r_eff_sup);
 +        /* 4 octants of a sphere */
 +        vol_est += 0.5*4.0/3.0*M_PI*pow(r_eff_sup, 3);
 +
 +        nsp_est = grid->nsubc_tot*vol_est*grid->atom_density/grid->na_c;
 +
 +        /* Subtract the non-local pair count */
 +        nsp_est -= nsp_est_nl;
 +
 +        if (debug)
 +        {
 +            fprintf(debug, "nsp_est local %5.1f non-local %5.1f\n",
 +                    nsp_est, nsp_est_nl);
 +        }
 +    }
 +    else
 +    {
 +        nsp_est = nsp_est_nl;
 +    }
 +
 +    if (min_ci_balanced <= 0 || grid->nc >= min_ci_balanced || grid->nc == 0)
 +    {
 +        /* We don't need to worry */
 +        nsubpair_max = -1;
 +    }
 +    else
 +    {
 +        /* Thus the (average) maximum j-list size should be as follows */
 +        nsubpair_max = max(1, (int)(nsp_est/min_ci_balanced+0.5));
 +
 +        /* Since the target value is a maximum (this avoids high outliers,
 +         * which lead to load imbalance), not average, we add half the
 +         * number of pairs in a cj4 block to get the average about right.
 +         */
 +        nsubpair_max += GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE/2;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "nbl nsp estimate %.1f, nsubpair_max %d\n",
 +                nsp_est, nsubpair_max);
 +    }
 +
 +    return nsubpair_max;
 +}
 +
 +/* Debug list print function */
 +static void print_nblist_ci_cj(FILE *fp, const nbnxn_pairlist_t *nbl)
 +{
 +    int i, j;
 +
 +    for (i = 0; i < nbl->nci; i++)
 +    {
 +        fprintf(fp, "ci %4d  shift %2d  ncj %3d\n",
 +                nbl->ci[i].ci, nbl->ci[i].shift,
 +                nbl->ci[i].cj_ind_end - nbl->ci[i].cj_ind_start);
 +
 +        for (j = nbl->ci[i].cj_ind_start; j < nbl->ci[i].cj_ind_end; j++)
 +        {
 +            fprintf(fp, "  cj %5d  imask %x\n",
 +                    nbl->cj[j].cj,
 +                    nbl->cj[j].excl);
 +        }
 +    }
 +}
 +
 +/* Debug list print function */
 +static void print_nblist_sci_cj(FILE *fp, const nbnxn_pairlist_t *nbl)
 +{
 +    int i, j4, j, ncp, si;
 +
 +    for (i = 0; i < nbl->nsci; i++)
 +    {
 +        fprintf(fp, "ci %4d  shift %2d  ncj4 %2d\n",
 +                nbl->sci[i].sci, nbl->sci[i].shift,
 +                nbl->sci[i].cj4_ind_end - nbl->sci[i].cj4_ind_start);
 +
 +        ncp = 0;
 +        for (j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++)
 +        {
 +            for (j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++)
 +            {
 +                fprintf(fp, "  sj %5d  imask %x\n",
 +                        nbl->cj4[j4].cj[j],
 +                        nbl->cj4[j4].imei[0].imask);
 +                for (si=0; si<GPU_NSUBCELL; si++)
 +                {
 +                    if (nbl->cj4[j4].imei[0].imask & (1U << (j*GPU_NSUBCELL + si)))
 +                    {
 +                        ncp++;
 +                    }
 +                }
 +            }
 +        }
 +        fprintf(fp, "ci %4d  shift %2d  ncj4 %2d ncp %3d\n",
 +                nbl->sci[i].sci, nbl->sci[i].shift,
 +                nbl->sci[i].cj4_ind_end - nbl->sci[i].cj4_ind_start,
 +                ncp);
 +    }
 +}
 +
 +/* Combine pair lists *nbl generated on multiple threads nblc */
 +static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
 +                            nbnxn_pairlist_t *nblc)
 +{
 +    int nsci, ncj4, nexcl;
 +    int n, i;
 +
 +    if (nblc->bSimple)
 +    {
 +        gmx_incons("combine_nblists does not support simple lists");
 +    }
 +
 +    nsci  = nblc->nsci;
 +    ncj4  = nblc->ncj4;
 +    nexcl = nblc->nexcl;
 +    for (i = 0; i < nnbl; i++)
 +    {
 +        nsci  += nbl[i]->nsci;
 +        ncj4  += nbl[i]->ncj4;
 +        nexcl += nbl[i]->nexcl;
 +    }
 +
 +    if (nsci > nblc->sci_nalloc)
 +    {
 +        nb_realloc_sci(nblc, nsci);
 +    }
 +    if (ncj4 > nblc->cj4_nalloc)
 +    {
 +        nblc->cj4_nalloc = over_alloc_small(ncj4);
 +        nbnxn_realloc_void((void **)&nblc->cj4,
 +                           nblc->ncj4*sizeof(*nblc->cj4),
 +                           nblc->cj4_nalloc*sizeof(*nblc->cj4),
 +                           nblc->alloc, nblc->free);
 +    }
 +    if (nexcl > nblc->excl_nalloc)
 +    {
 +        nblc->excl_nalloc = over_alloc_small(nexcl);
 +        nbnxn_realloc_void((void **)&nblc->excl,
 +                           nblc->nexcl*sizeof(*nblc->excl),
 +                           nblc->excl_nalloc*sizeof(*nblc->excl),
 +                           nblc->alloc, nblc->free);
 +    }
 +
 +    /* Each thread should copy its own data to the combined arrays,
 +     * as otherwise data will go back and forth between different caches.
 +     */
 +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static)
 +    for (n = 0; n < nnbl; n++)
 +    {
 +        int sci_offset;
 +        int cj4_offset;
 +        int ci_offset;
 +        int excl_offset;
 +        int i, j4;
 +        const nbnxn_pairlist_t *nbli;
 +
 +        /* Determine the offset in the combined data for our thread */
 +        sci_offset  = nblc->nsci;
 +        cj4_offset  = nblc->ncj4;
 +        ci_offset   = nblc->nci_tot;
 +        excl_offset = nblc->nexcl;
 +
 +        for (i = 0; i < n; i++)
 +        {
 +            sci_offset  += nbl[i]->nsci;
 +            cj4_offset  += nbl[i]->ncj4;
 +            ci_offset   += nbl[i]->nci_tot;
 +            excl_offset += nbl[i]->nexcl;
 +        }
 +
 +        nbli = nbl[n];
 +
 +        for (i = 0; i < nbli->nsci; i++)
 +        {
 +            nblc->sci[sci_offset+i]                = nbli->sci[i];
 +            nblc->sci[sci_offset+i].cj4_ind_start += cj4_offset;
 +            nblc->sci[sci_offset+i].cj4_ind_end   += cj4_offset;
 +        }
 +
 +        for (j4 = 0; j4 < nbli->ncj4; j4++)
 +        {
 +            nblc->cj4[cj4_offset+j4]                   = nbli->cj4[j4];
 +            nblc->cj4[cj4_offset+j4].imei[0].excl_ind += excl_offset;
 +            nblc->cj4[cj4_offset+j4].imei[1].excl_ind += excl_offset;
 +        }
 +
 +        for (j4 = 0; j4 < nbli->nexcl; j4++)
 +        {
 +            nblc->excl[excl_offset+j4] = nbli->excl[j4];
 +        }
 +    }
 +
 +    for (n = 0; n < nnbl; n++)
 +    {
 +        nblc->nsci    += nbl[n]->nsci;
 +        nblc->ncj4    += nbl[n]->ncj4;
 +        nblc->nci_tot += nbl[n]->nci_tot;
 +        nblc->nexcl   += nbl[n]->nexcl;
 +    }
 +}
 +
 +/* Returns the next ci to be processes by our thread */
 +static gmx_bool next_ci(const nbnxn_grid_t *grid,
 +                        int conv,
 +                        int nth, int ci_block,
 +                        int *ci_x, int *ci_y,
 +                        int *ci_b, int *ci)
 +{
 +    (*ci_b)++;
 +    (*ci)++;
 +
 +    if (*ci_b == ci_block)
 +    {
 +        /* Jump to the next block assigned to this task */
 +        *ci   += (nth - 1)*ci_block;
 +        *ci_b  = 0;
 +    }
 +
 +    if (*ci >= grid->nc*conv)
 +    {
 +        return FALSE;
 +    }
 +
 +    while (*ci >= grid->cxy_ind[*ci_x*grid->ncy + *ci_y + 1]*conv)
 +    {
 +        *ci_y += 1;
 +        if (*ci_y == grid->ncy)
 +        {
 +            *ci_x += 1;
 +            *ci_y  = 0;
 +        }
 +    }
 +
 +    return TRUE;
 +}
 +
 +/* Returns the distance^2 for which we put cell pairs in the list
 + * without checking atom pair distances. This is usually < rlist^2.
 + */
 +static float boundingbox_only_distance2(const nbnxn_grid_t *gridi,
 +                                        const nbnxn_grid_t *gridj,
 +                                        real                rlist,
 +                                        gmx_bool            simple)
 +{
 +    /* If the distance between two sub-cell bounding boxes is less
 +     * than this distance, do not check the distance between
 +     * all particle pairs in the sub-cell, since then it is likely
 +     * that the box pair has atom pairs within the cut-off.
 +     * We use the nblist cut-off minus 0.5 times the average x/y diagonal
 +     * spacing of the sub-cells. Around 40% of the checked pairs are pruned.
 +     * Using more than 0.5 gains at most 0.5%.
 +     * If forces are calculated more than twice, the performance gain
 +     * in the force calculation outweighs the cost of checking.
 +     * Note that with subcell lists, the atom-pair distance check
 +     * is only performed when only 1 out of 8 sub-cells in within range,
 +     * this is because the GPU is much faster than the cpu.
 +     */
 +    real bbx, bby;
 +    real rbb2;
 +
 +    bbx = 0.5*(gridi->sx + gridj->sx);
 +    bby = 0.5*(gridi->sy + gridj->sy);
 +    if (!simple)
 +    {
 +        bbx /= GPU_NSUBCELL_X;
 +        bby /= GPU_NSUBCELL_Y;
 +    }
 +
 +    rbb2 = sqr(max(0, rlist - 0.5*sqrt(bbx*bbx + bby*bby)));
 +
 +#ifndef GMX_DOUBLE
 +    return rbb2;
 +#else
 +    return (float)((1+GMX_FLOAT_EPS)*rbb2);
 +#endif
 +}
 +
 +static int get_ci_block_size(const nbnxn_grid_t *gridi,
 +                             gmx_bool bDomDec, int nth)
 +{
 +    const int ci_block_enum      = 5;
 +    const int ci_block_denom     = 11;
 +    const int ci_block_min_atoms = 16;
 +    int ci_block;
 +
 +    /* Here we decide how to distribute the blocks over the threads.
 +     * We use prime numbers to try to avoid that the grid size becomes
 +     * a multiple of the number of threads, which would lead to some
 +     * threads getting "inner" pairs and others getting boundary pairs,
 +     * which in turns will lead to load imbalance between threads.
 +     * Set the block size as 5/11/ntask times the average number of cells
 +     * in a y,z slab. This should ensure a quite uniform distribution
 +     * of the grid parts of the different thread along all three grid
 +     * zone boundaries with 3D domain decomposition. At the same time
 +     * the blocks will not become too small.
 +     */
 +    ci_block = (gridi->nc*ci_block_enum)/(ci_block_denom*gridi->ncx*nth);
 +
 +    /* Ensure the blocks are not too small: avoids cache invalidation */
 +    if (ci_block*gridi->na_sc < ci_block_min_atoms)
 +    {
 +        ci_block = (ci_block_min_atoms + gridi->na_sc - 1)/gridi->na_sc;
 +    }
 +
 +    /* Without domain decomposition
 +     * or with less than 3 blocks per task, divide in nth blocks.
 +     */
 +    if (!bDomDec || ci_block*3*nth > gridi->nc)
 +    {
 +        ci_block = (gridi->nc + nth - 1)/nth;
 +    }
 +
 +    return ci_block;
 +}
 +
 +/* Generates the part of pair-list nbl assigned to our thread */
 +static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
 +                                     const nbnxn_grid_t *gridi,
 +                                     const nbnxn_grid_t *gridj,
 +                                     nbnxn_search_work_t *work,
 +                                     const nbnxn_atomdata_t *nbat,
 +                                     const t_blocka *excl,
 +                                     real rlist,
 +                                     int nb_kernel_type,
 +                                     int ci_block,
 +                                     gmx_bool bFBufferFlag,
 +                                     int nsubpair_max,
 +                                     gmx_bool progBal,
 +                                     int min_ci_balanced,
 +                                     int th, int nth,
 +                                     nbnxn_pairlist_t *nbl)
 +{
 +    int  na_cj_2log;
 +    matrix box;
 +    real rl2;
 +    float rbb2;
 +    int  d;
 +    int  ci_b, ci, ci_x, ci_y, ci_xy, cj;
 +    ivec shp;
 +    int  tx, ty, tz;
 +    int  shift;
 +    gmx_bool bMakeList;
 +    real shx, shy, shz;
 +    int  conv_i, cell0_i;
 +    const float *bb_i, *bbcz_i, *bbcz_j;
 +    const int *flags_i;
 +    real bx0, bx1, by0, by1, bz0, bz1;
 +    real bz1_frac;
 +    real d2cx, d2z, d2z_cx, d2z_cy, d2zx, d2zxy, d2xy;
 +    int  cxf, cxl, cyf, cyf_x, cyl;
 +    int  cx, cy;
 +    int  c0, c1, cs, cf, cl;
 +    int  ndistc;
 +    int  ncpcheck;
 +    int  gridi_flag_shift = 0, gridj_flag_shift = 0;
 +    unsigned *gridj_flag  = NULL;
 +    int  ncj_old_i, ncj_old_j;
 +
 +    nbs_cycle_start(&work->cc[enbsCCsearch]);
 +
 +    if (gridj->bSimple != nbl->bSimple)
 +    {
 +        gmx_incons("Grid incompatible with pair-list");
 +    }
 +
 +    sync_work(nbl);
 +    nbl->na_sc = gridj->na_sc;
 +    nbl->na_ci = gridj->na_c;
 +    nbl->na_cj = nbnxn_kernel_to_cj_size(nb_kernel_type);
 +    na_cj_2log = get_2log(nbl->na_cj);
 +
 +    nbl->rlist  = rlist;
 +
 +    if (bFBufferFlag)
 +    {
 +        /* Determine conversion of clusters to flag blocks */
 +        gridi_flag_shift = 0;
 +        while ((nbl->na_ci<<gridi_flag_shift) < NBNXN_BUFFERFLAG_SIZE)
 +        {
 +            gridi_flag_shift++;
 +        }
 +        gridj_flag_shift = 0;
 +        while ((nbl->na_cj<<gridj_flag_shift) < NBNXN_BUFFERFLAG_SIZE)
 +        {
 +            gridj_flag_shift++;
 +        }
 +
 +        gridj_flag = work->buffer_flags.flag;
 +    }
 +
 +    copy_mat(nbs->box, box);
 +
 +    rl2 = nbl->rlist*nbl->rlist;
 +
 +    rbb2 = boundingbox_only_distance2(gridi, gridj, nbl->rlist, nbl->bSimple);
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "nbl bounding box only distance %f\n", sqrt(rbb2));
 +    }
 +
 +    /* Set the shift range */
 +    for (d = 0; d < DIM; d++)
 +    {
 +        /* Check if we need periodicity shifts.
 +         * Without PBC or with domain decomposition we don't need them.
 +         */
 +        if (d >= ePBC2npbcdim(nbs->ePBC) || nbs->dd_dim[d])
 +        {
 +            shp[d] = 0;
 +        }
 +        else
 +        {
 +            if (d == XX &&
 +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
 +            {
 +                shp[d] = 2;
 +            }
 +            else
 +            {
 +                shp[d] = 1;
 +            }
 +        }
 +    }
 +
 +    if (nbl->bSimple && !gridi->bSimple)
 +    {
 +        conv_i  = gridi->na_sc/gridj->na_sc;
 +        bb_i    = gridi->bb_simple;
 +        bbcz_i  = gridi->bbcz_simple;
 +        flags_i = gridi->flags_simple;
 +    }
 +    else
 +    {
 +        conv_i  = 1;
 +        bb_i    = gridi->bb;
 +        bbcz_i  = gridi->bbcz;
 +        flags_i = gridi->flags;
 +    }
 +    cell0_i = gridi->cell0*conv_i;
 +
 +    bbcz_j = gridj->bbcz;
 +
 +    if (conv_i != 1)
 +    {
 +        /* Blocks of the conversion factor - 1 give a large repeat count
 +         * combined with a small block size. This should result in good
 +         * load balancing for both small and large domains.
 +         */
 +        ci_block = conv_i - 1;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug, "nbl nc_i %d col.av. %.1f ci_block %d\n",
 +                gridi->nc, gridi->nc/(double)(gridi->ncx*gridi->ncy), ci_block);
 +    }
 +
 +    ndistc   = 0;
 +    ncpcheck = 0;
 +
 +    /* Initially ci_b and ci to 1 before where we want them to start,
 +     * as they will both be incremented in next_ci.
 +     */
 +    ci_b = -1;
 +    ci   = th*ci_block - 1;
 +    ci_x = 0;
 +    ci_y = 0;
 +    while (next_ci(gridi, conv_i, nth, ci_block, &ci_x, &ci_y, &ci_b, &ci))
 +    {
 +        if (nbl->bSimple && flags_i[ci] == 0)
 +        {
 +            continue;
 +        }
 +
 +        ncj_old_i = nbl->ncj;
 +
 +        d2cx = 0;
 +        if (gridj != gridi && shp[XX] == 0)
 +        {
 +            if (nbl->bSimple)
 +            {
 +                bx1 = bb_i[ci*NNBSBB_B+NNBSBB_C+XX];
 +            }
 +            else
 +            {
 +                bx1 = gridi->c0[XX] + (ci_x+1)*gridi->sx;
 +            }
 +            if (bx1 < gridj->c0[XX])
 +            {
 +                d2cx = sqr(gridj->c0[XX] - bx1);
 +
 +                if (d2cx >= rl2)
 +                {
 +                    continue;
 +                }
 +            }
 +        }
 +
 +        ci_xy = ci_x*gridi->ncy + ci_y;
 +
 +        /* Loop over shift vectors in three dimensions */
 +        for (tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
 +        {
 +            shz = tz*box[ZZ][ZZ];
 +
 +            bz0 = bbcz_i[ci*NNBSBB_D  ] + shz;
 +            bz1 = bbcz_i[ci*NNBSBB_D+1] + shz;
 +
 +            if (tz == 0)
 +            {
 +                d2z = 0;
 +            }
 +            else if (tz < 0)
 +            {
 +                d2z = sqr(bz1);
 +            }
 +            else
 +            {
 +                d2z = sqr(bz0 - box[ZZ][ZZ]);
 +            }
 +
 +            d2z_cx = d2z + d2cx;
 +
 +            if (d2z_cx >= rl2)
 +            {
 +                continue;
 +            }
 +
 +            bz1_frac =
 +                bz1/((real)(gridi->cxy_ind[ci_xy+1] - gridi->cxy_ind[ci_xy]));
 +            if (bz1_frac < 0)
 +            {
 +                bz1_frac = 0;
 +            }
 +            /* The check with bz1_frac close to or larger than 1 comes later */
 +
 +            for (ty = -shp[YY]; ty <= shp[YY]; ty++)
 +            {
 +                shy = ty*box[YY][YY] + tz*box[ZZ][YY];
 +
 +                if (nbl->bSimple)
 +                {
 +                    by0 = bb_i[ci*NNBSBB_B         +YY] + shy;
 +                    by1 = bb_i[ci*NNBSBB_B+NNBSBB_C+YY] + shy;
 +                }
 +                else
 +                {
 +                    by0 = gridi->c0[YY] + (ci_y  )*gridi->sy + shy;
 +                    by1 = gridi->c0[YY] + (ci_y+1)*gridi->sy + shy;
 +                }
 +
 +                get_cell_range(by0, by1,
 +                               gridj->ncy, gridj->c0[YY], gridj->sy, gridj->inv_sy,
 +                               d2z_cx, rl2,
 +                               &cyf, &cyl);
 +
 +                if (cyf > cyl)
 +                {
 +                    continue;
 +                }
 +
 +                d2z_cy = d2z;
 +                if (by1 < gridj->c0[YY])
 +                {
 +                    d2z_cy += sqr(gridj->c0[YY] - by1);
 +                }
 +                else if (by0 > gridj->c1[YY])
 +                {
 +                    d2z_cy += sqr(by0 - gridj->c1[YY]);
 +                }
 +
 +                for (tx = -shp[XX]; tx <= shp[XX]; tx++)
 +                {
 +                    shift = XYZ2IS(tx, ty, tz);
 +
 +#ifdef NBNXN_SHIFT_BACKWARD
 +                    if (gridi == gridj && shift > CENTRAL)
 +                    {
 +                        continue;
 +                    }
 +#endif
 +
 +                    shx = tx*box[XX][XX] + ty*box[YY][XX] + tz*box[ZZ][XX];
 +
 +                    if (nbl->bSimple)
 +                    {
 +                        bx0 = bb_i[ci*NNBSBB_B         +XX] + shx;
 +                        bx1 = bb_i[ci*NNBSBB_B+NNBSBB_C+XX] + shx;
 +                    }
 +                    else
 +                    {
 +                        bx0 = gridi->c0[XX] + (ci_x  )*gridi->sx + shx;
 +                        bx1 = gridi->c0[XX] + (ci_x+1)*gridi->sx + shx;
 +                    }
 +
 +                    get_cell_range(bx0, bx1,
 +                                   gridj->ncx, gridj->c0[XX], gridj->sx, gridj->inv_sx,
 +                                   d2z_cy, rl2,
 +                                   &cxf, &cxl);
 +
 +                    if (cxf > cxl)
 +                    {
 +                        continue;
 +                    }
 +
 +                    if (nbl->bSimple)
 +                    {
 +                        new_ci_entry(nbl, cell0_i+ci, shift, flags_i[ci],
 +                                     nbl->work);
 +                    }
 +                    else
 +                    {
 +                        new_sci_entry(nbl, cell0_i+ci, shift, flags_i[ci],
 +                                      nbl->work);
 +                    }
 +
 +#ifndef NBNXN_SHIFT_BACKWARD
 +                    if (cxf < ci_x)
 +#else
 +                    if (shift == CENTRAL && gridi == gridj &&
 +                        cxf < ci_x)
 +#endif
 +                    {
 +                        /* Leave the pairs with i > j.
 +                         * x is the major index, so skip half of it.
 +                         */
 +                        cxf = ci_x;
 +                    }
 +
 +                    if (nbl->bSimple)
 +                    {
 +                        set_icell_bb_simple(bb_i, ci, shx, shy, shz,
 +                                            nbl->work->bb_ci);
 +                    }
 +                    else
 +                    {
 +                        set_icell_bb_supersub(bb_i, ci, shx, shy, shz,
 +                                              nbl->work->bb_ci);
 +                    }
 +
 +                    nbs->icell_set_x(cell0_i+ci, shx, shy, shz,
 +                                     gridi->na_c, nbat->xstride, nbat->x,
 +                                     nbl->work);
 +
 +                    for (cx = cxf; cx <= cxl; cx++)
 +                    {
 +                        d2zx = d2z;
 +                        if (gridj->c0[XX] + cx*gridj->sx > bx1)
 +                        {
 +                            d2zx += sqr(gridj->c0[XX] + cx*gridj->sx - bx1);
 +                        }
 +                        else if (gridj->c0[XX] + (cx+1)*gridj->sx < bx0)
 +                        {
 +                            d2zx += sqr(gridj->c0[XX] + (cx+1)*gridj->sx - bx0);
 +                        }
 +
 +#ifndef NBNXN_SHIFT_BACKWARD
 +                        if (gridi == gridj &&
 +                            cx == 0 && cyf < ci_y)
 +#else
 +                        if (gridi == gridj &&
 +                            cx == 0 && shift == CENTRAL && cyf < ci_y)
 +#endif
 +                        {
 +                            /* Leave the pairs with i > j.
 +                             * Skip half of y when i and j have the same x.
 +                             */
 +                            cyf_x = ci_y;
 +                        }
 +                        else
 +                        {
 +                            cyf_x = cyf;
 +                        }
 +
 +                        for (cy = cyf_x; cy <= cyl; cy++)
 +                        {
 +                            c0 = gridj->cxy_ind[cx*gridj->ncy+cy];
 +                            c1 = gridj->cxy_ind[cx*gridj->ncy+cy+1];
 +#ifdef NBNXN_SHIFT_BACKWARD
 +                            if (gridi == gridj &&
 +                                shift == CENTRAL && c0 < ci)
 +                            {
 +                                c0 = ci;
 +                            }
 +#endif
 +
 +                            d2zxy = d2zx;
 +                            if (gridj->c0[YY] + cy*gridj->sy > by1)
 +                            {
 +                                d2zxy += sqr(gridj->c0[YY] + cy*gridj->sy - by1);
 +                            }
 +                            else if (gridj->c0[YY] + (cy+1)*gridj->sy < by0)
 +                            {
 +                                d2zxy += sqr(gridj->c0[YY] + (cy+1)*gridj->sy - by0);
 +                            }
 +                            if (c1 > c0 && d2zxy < rl2)
 +                            {
 +                                cs = c0 + (int)(bz1_frac*(c1 - c0));
 +                                if (cs >= c1)
 +                                {
 +                                    cs = c1 - 1;
 +                                }
 +
 +                                d2xy = d2zxy - d2z;
 +
 +                                /* Find the lowest cell that can possibly
 +                                 * be within range.
 +                                 */
 +                                cf = cs;
 +                                while (cf > c0 &&
 +                                       (bbcz_j[cf*NNBSBB_D+1] >= bz0 ||
 +                                        d2xy + sqr(bbcz_j[cf*NNBSBB_D+1] - bz0) < rl2))
 +                                {
 +                                    cf--;
 +                                }
 +
 +                                /* Find the highest cell that can possibly
 +                                 * be within range.
 +                                 */
 +                                cl = cs;
 +                                while (cl < c1-1 &&
 +                                       (bbcz_j[cl*NNBSBB_D] <= bz1 ||
 +                                        d2xy + sqr(bbcz_j[cl*NNBSBB_D] - bz1) < rl2))
 +                                {
 +                                    cl++;
 +                                }
 +
 +#ifdef NBNXN_REFCODE
 +                                {
 +                                    /* Simple reference code, for debugging,
 +                                     * overrides the more complex code above.
 +                                     */
 +                                    int k;
 +                                    cf = c1;
 +                                    cl = -1;
 +                                    for (k = c0; k < c1; k++)
 +                                    {
 +                                        if (box_dist2(bx0, bx1, by0, by1, bz0, bz1,
 +                                                      bb+k*NNBSBB_B) < rl2 &&
 +                                            k < cf)
 +                                        {
 +                                            cf = k;
 +                                        }
 +                                        if (box_dist2(bx0, bx1, by0, by1, bz0, bz1,
 +                                                      bb+k*NNBSBB_B) < rl2 &&
 +                                            k > cl)
 +                                        {
 +                                            cl = k;
 +                                        }
 +                                    }
 +                                }
 +#endif
 +
 +                                if (gridi == gridj)
 +                                {
 +                                    /* We want each atom/cell pair only once,
 +                                     * only use cj >= ci.
 +                                     */
 +#ifndef NBNXN_SHIFT_BACKWARD
 +                                    cf = max(cf, ci);
 +#else
 +                                    if (shift == CENTRAL)
 +                                    {
 +                                        cf = max(cf, ci);
 +                                    }
 +#endif
 +                                }
 +
 +                                if (cf <= cl)
 +                                {
 +                                    /* For f buffer flags with simple lists */
 +                                    ncj_old_j = nbl->ncj;
 +
 +                                    switch (nb_kernel_type)
 +                                    {
 +                                        case nbnxnk4x4_PlainC:
 +                                            check_subcell_list_space_simple(nbl, cl-cf+1);
 +
 +                                            make_cluster_list_simple(gridj,
 +                                                                     nbl, ci, cf, cl,
 +                                                                     (gridi == gridj && shift == CENTRAL),
 +                                                                     nbat->x,
 +                                                                     rl2, rbb2,
 +                                                                     &ndistc);
 +                                            break;
 +#ifdef GMX_NBNXN_SIMD_4XN
 +                                        case nbnxnk4xN_SIMD_4xN:
 +                                            check_subcell_list_space_simple(nbl, ci_to_cj(na_cj_2log, cl-cf)+2);
 +                                            make_cluster_list_simd_4xn(gridj,
 +                                                                       nbl, ci, cf, cl,
 +                                                                       (gridi == gridj && shift == CENTRAL),
 +                                                                       nbat->x,
 +                                                                       rl2, rbb2,
 +                                                                       &ndistc);
 +                                            break;
 +#endif
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +                                        case nbnxnk4xN_SIMD_2xNN:
 +                                            check_subcell_list_space_simple(nbl, ci_to_cj(na_cj_2log, cl-cf)+2);
 +                                            make_cluster_list_simd_2xnn(gridj,
 +                                                                        nbl, ci, cf, cl,
 +                                                                        (gridi == gridj && shift == CENTRAL),
 +                                                                        nbat->x,
 +                                                                        rl2, rbb2,
 +                                                                        &ndistc);
 +                                            break;
 +#endif
 +                                        case nbnxnk8x8x8_PlainC:
 +                                        case nbnxnk8x8x8_CUDA:
 +                                            check_subcell_list_space_supersub(nbl, cl-cf+1);
 +                                            for (cj = cf; cj <= cl; cj++)
 +                                            {
 +                                                make_cluster_list_supersub(nbs, gridi, gridj,
 +                                                                           nbl, ci, cj,
 +                                                                           (gridi == gridj && shift == CENTRAL && ci == cj),
 +                                                                           nbat->xstride, nbat->x,
 +                                                                           rl2, rbb2,
 +                                                                           &ndistc);
 +                                            }
 +                                            break;
 +                                    }
 +                                    ncpcheck += cl - cf + 1;
 +
 +                                    if (bFBufferFlag && nbl->ncj > ncj_old_j)
 +                                    {
 +                                        int cbf, cbl, cb;
 +
 +                                        cbf = nbl->cj[ncj_old_j].cj >> gridj_flag_shift;
 +                                        cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift;
 +                                        for (cb = cbf; cb <= cbl; cb++)
 +                                        {
 +                                            gridj_flag[cb] = 1U<<th;
 +                                        }
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +
 +                    /* Set the exclusions for this ci list */
 +                    if (nbl->bSimple)
 +                    {
 +                        set_ci_top_excls(nbs,
 +                                         nbl,
 +                                         shift == CENTRAL && gridi == gridj,
 +                                         gridj->na_c_2log,
 +                                         na_cj_2log,
 +                                         &(nbl->ci[nbl->nci]),
 +                                         excl);
 +                    }
 +                    else
 +                    {
 +                        set_sci_top_excls(nbs,
 +                                          nbl,
 +                                          shift == CENTRAL && gridi == gridj,
 +                                          gridj->na_c_2log,
 +                                          &(nbl->sci[nbl->nsci]),
 +                                          excl);
 +                    }
 +
 +                    /* Close this ci list */
 +                    if (nbl->bSimple)
 +                    {
 +                        close_ci_entry_simple(nbl);
 +                    }
 +                    else
 +                    {
 +                        close_ci_entry_supersub(nbl,
 +                                                nsubpair_max,
 +                                                progBal, min_ci_balanced,
 +                                                th, nth);
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (bFBufferFlag && nbl->ncj > ncj_old_i)
 +        {
 +            work->buffer_flags.flag[(gridi->cell0+ci)>>gridi_flag_shift] = 1U<<th;
 +        }
 +    }
 +
 +    work->ndistc = ndistc;
 +
 +    nbs_cycle_stop(&work->cc[enbsCCsearch]);
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "number of distance checks %d\n", ndistc);
 +        fprintf(debug, "ncpcheck %s %d\n", gridi == gridj ? "local" : "non-local",
 +                ncpcheck);
 +
 +        if (nbl->bSimple)
 +        {
 +            print_nblist_statistics_simple(debug, nbl, nbs, rlist);
 +        }
 +        else
 +        {
 +            print_nblist_statistics_supersub(debug, nbl, nbs, rlist);
 +        }
 +
 +    }
 +}
 +
 +static void reduce_buffer_flags(const nbnxn_search_t        nbs,
 +                                int                         nsrc,
 +                                const nbnxn_buffer_flags_t *dest)
 +{
 +    int s, b;
 +    const unsigned *flag;
 +
 +    for (s = 0; s < nsrc; s++)
 +    {
 +        flag = nbs->work[s].buffer_flags.flag;
 +
 +        for (b = 0; b < dest->nflag; b++)
 +        {
 +            dest->flag[b] |= flag[b];
 +        }
 +    }
 +}
 +
 +static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
 +{
 +    int nelem, nkeep, ncopy, nred, b, c, out;
 +
 +    nelem = 0;
 +    nkeep = 0;
 +    ncopy = 0;
 +    nred  = 0;
 +    for (b = 0; b < flags->nflag; b++)
 +    {
 +        if (flags->flag[b] == 1)
 +        {
 +            /* Only flag 0 is set, no copy of reduction required */
 +            nelem++;
 +            nkeep++;
 +        }
 +        else if (flags->flag[b] > 0)
 +        {
 +            c = 0;
 +            for (out = 0; out < nout; out++)
 +            {
 +                if (flags->flag[b] & (1U<<out))
 +                {
 +                    c++;
 +                }
 +            }
 +            nelem += c;
 +            if (c == 1)
 +            {
 +                ncopy++;
 +            }
 +            else
 +            {
 +                nred += c;
 +            }
 +        }
 +    }
 +
 +    fprintf(debug, "nbnxn reduction: #flag %d #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n",
 +            flags->nflag, nout,
 +            nelem/(double)(flags->nflag),
 +            nkeep/(double)(flags->nflag),
 +            ncopy/(double)(flags->nflag),
 +            nred/(double)(flags->nflag));
 +}
 +
 +/* Perform a count (linear) sort to sort the smaller lists to the end.
 + * This avoids load imbalance on the GPU, as large lists will be
 + * scheduled and executed first and the smaller lists later.
 + * Load balancing between multi-processors only happens at the end
 + * and there smaller lists lead to more effective load balancing.
 + * The sorting is done on the cj4 count, not on the actual pair counts.
 + * Not only does this make the sort faster, but it also results in
 + * better load balancing than using a list sorted on exact load.
 + * This function swaps the pointer in the pair list to avoid a copy operation.
 + */
 +static void sort_sci(nbnxn_pairlist_t *nbl)
 +{
 +    nbnxn_list_work_t *work;
 +    int                m, i, s, s0, s1;
 +    nbnxn_sci_t       *sci_sort;
 +
 +    if (nbl->ncj4 <= nbl->nsci)
 +    {
 +        /* nsci = 0 or all sci have size 1, sorting won't change the order */
 +        return;
 +    }
 +
 +    work = nbl->work;
 +
 +    /* We will distinguish differences up to double the average */
 +    m = (2*nbl->ncj4)/nbl->nsci;
 +
 +    if (m + 1 > work->sort_nalloc)
 +    {
 +        work->sort_nalloc = over_alloc_large(m + 1);
 +        srenew(work->sort, work->sort_nalloc);
 +    }
 +
 +    if (work->sci_sort_nalloc != nbl->sci_nalloc)
 +    {
 +        work->sci_sort_nalloc = nbl->sci_nalloc;
 +        nbnxn_realloc_void((void **)&work->sci_sort,
 +                           0,
 +                           work->sci_sort_nalloc*sizeof(*work->sci_sort),
 +                           nbl->alloc, nbl->free);
 +    }
 +
 +    /* Count the entries of each size */
 +    for(i = 0; i <= m; i++)
 +    {
 +        work->sort[i] = 0;
 +    }
 +    for(s = 0; s < nbl->nsci; s++)
 +    {
 +        i = min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start);
 +        work->sort[i]++;
 +    }
 +    /* Calculate the offset for each count */
 +    s0           = work->sort[m];
 +    work->sort[m] = 0;
 +    for(i = m - 1; i >= 0; i--)
 +    {
 +        s1            = work->sort[i];
 +        work->sort[i] = work->sort[i + 1] + s0;
 +        s0            = s1;
 +    }
 +
 +    /* Sort entries directly into place */
 +    sci_sort = work->sci_sort;
 +    for(s = 0; s < nbl->nsci; s++)
 +    {
 +        i = min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start);
 +        sci_sort[work->sort[i]++] = nbl->sci[s];
 +    }
 +
 +    /* Swap the sci pointers so we use the new, sorted list */
 +    work->sci_sort = nbl->sci;
 +    nbl->sci       = sci_sort;
 +}
 +
 +/* Make a local or non-local pair-list, depending on iloc */
 +void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
 +                         nbnxn_atomdata_t     *nbat,
 +                         const t_blocka       *excl,
 +                         real                  rlist,
 +                         int                   min_ci_balanced,
 +                         nbnxn_pairlist_set_t *nbl_list,
 +                         int                   iloc,
 +                         int                   nb_kernel_type,
 +                         t_nrnb               *nrnb)
 +{
 +    nbnxn_grid_t *gridi, *gridj;
 +    gmx_bool bGPUCPU;
 +    int nzi, zi, zj0, zj1, zj;
 +    int nsubpair_max;
 +    int th;
 +    int nnbl;
 +    nbnxn_pairlist_t **nbl;
 +    int ci_block;
 +    gmx_bool CombineNBLists;
 +    gmx_bool progBal;
 +    int np_tot, np_noq, np_hlj, nap;
 +
 +    /* Check if we are running hybrid GPU + CPU nbnxn mode */
 +    bGPUCPU = (!nbs->grid[0].bSimple && nbl_list->bSimple);
 +
 +    nnbl            = nbl_list->nnbl;
 +    nbl             = nbl_list->nbl;
 +    CombineNBLists  = nbl_list->bCombined;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "ns making %d nblists\n", nnbl);
 +    }
 +
 +    nbat->bUseBufferFlags = (nbat->nout > 1);
 +    /* We should re-init the flags before making the first list */
 +    if (nbat->bUseBufferFlags && (LOCAL_I(iloc) || bGPUCPU))
 +    {
 +        init_buffer_flags(&nbat->buffer_flags, nbat->natoms);
 +    }
 +
 +    if (nbl_list->bSimple)
 +    {
 +        switch (nb_kernel_type)
 +        {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +            case nbnxnk4xN_SIMD_4xN:
 +                nbs->icell_set_x = icell_set_x_simd_4xn;
 +                break;
 +#endif
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +            case nbnxnk4xN_SIMD_2xNN:
 +                nbs->icell_set_x = icell_set_x_simd_2xnn;
 +                break;
 +#endif
 +            default:
 +                nbs->icell_set_x = icell_set_x_simple;
 +                break;
 +        }
 +    }
 +    else
 +    {
 +#ifdef NBNXN_SEARCH_BB_SSE
 +        nbs->icell_set_x = icell_set_x_supersub_sse8;
 +#else
 +        nbs->icell_set_x = icell_set_x_supersub;
 +#endif
 +    }
 +
 +    if (LOCAL_I(iloc))
 +    {
 +        /* Only zone (grid) 0 vs 0 */
 +        nzi = 1;
 +        zj0 = 0;
 +        zj1 = 1;
 +    }
 +    else
 +    {
 +        nzi = nbs->zones->nizone;
 +    }
 +
 +    if (!nbl_list->bSimple && min_ci_balanced > 0)
 +    {
 +        nsubpair_max = get_nsubpair_max(nbs, iloc, rlist, min_ci_balanced);
 +    }
 +    else
 +    {
 +        nsubpair_max = 0;
 +    }
 +
 +    /* Clear all pair-lists */
 +    for (th = 0; th < nnbl; th++)
 +    {
 +        clear_pairlist(nbl[th]);
 +    }
 +
 +    for (zi = 0; zi < nzi; zi++)
 +    {
 +        gridi = &nbs->grid[zi];
 +
 +        if (NONLOCAL_I(iloc))
 +        {
 +            zj0 = nbs->zones->izone[zi].j0;
 +            zj1 = nbs->zones->izone[zi].j1;
 +            if (zi == 0)
 +            {
 +                zj0++;
 +            }
 +        }
 +        for (zj = zj0; zj < zj1; zj++)
 +        {
 +            gridj = &nbs->grid[zj];
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "ns search grid %d vs %d\n", zi, zj);
 +            }
 +
 +            nbs_cycle_start(&nbs->cc[enbsCCsearch]);
 +
 +            if (nbl[0]->bSimple && !gridi->bSimple)
 +            {
 +                /* Hybrid list, determine blocking later */
 +                ci_block = 0;
 +            }
 +            else
 +            {
 +                ci_block = get_ci_block_size(gridi, nbs->DomDec, nnbl);
 +            }
 +
 +#pragma omp parallel for num_threads(nnbl) schedule(static)
 +            for (th = 0; th < nnbl; th++)
 +            {
 +                /* Re-init the thread-local work flag data before making
 +                 * the first list (not an elegant conditional).
 +                 */
 +                if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0) ||
 +                                              (bGPUCPU && zi == 0 && zj == 1)))
 +                {
 +                    init_buffer_flags(&nbs->work[th].buffer_flags, nbat->natoms);
 +                }
 +
 +                if (CombineNBLists && th > 0)
 +                {
 +                    clear_pairlist(nbl[th]);
 +                }
 +
 +                /* With GPU: generate progressively smaller lists for
 +                 * load balancing for local only or non-local with 2 zones.
 +                 */
 +                progBal = (LOCAL_I(iloc) || nbs->zones->n <= 2);
 +
 +                /* Divide the i super cell equally over the nblists */
 +                nbnxn_make_pairlist_part(nbs, gridi, gridj,
 +                                         &nbs->work[th], nbat, excl,
 +                                         rlist,
 +                                         nb_kernel_type,
 +                                         ci_block,
 +                                         nbat->bUseBufferFlags,
 +                                         nsubpair_max,
 +                                         progBal, min_ci_balanced,
 +                                         th, nnbl,
 +                                         nbl[th]);
 +            }
 +            nbs_cycle_stop(&nbs->cc[enbsCCsearch]);
 +
 +            np_tot = 0;
 +            np_noq = 0;
 +            np_hlj = 0;
 +            for (th = 0; th < nnbl; th++)
 +            {
 +                inc_nrnb(nrnb, eNR_NBNXN_DIST2, nbs->work[th].ndistc);
 +
 +                if (nbl_list->bSimple)
 +                {
 +                    np_tot += nbl[th]->ncj;
 +                    np_noq += nbl[th]->work->ncj_noq;
 +                    np_hlj += nbl[th]->work->ncj_hlj;
 +                }
 +                else
 +                {
 +                    /* This count ignores potential subsequent pair pruning */
 +                    np_tot += nbl[th]->nci_tot;
 +                }
 +            }
 +            nap                   = nbl[0]->na_ci*nbl[0]->na_cj;
 +            nbl_list->natpair_ljq = (np_tot - np_noq)*nap - np_hlj*nap/2;
 +            nbl_list->natpair_lj  = np_noq*nap;
 +            nbl_list->natpair_q   = np_hlj*nap/2;
 +
 +            if (CombineNBLists && nnbl > 1)
 +            {
 +                nbs_cycle_start(&nbs->cc[enbsCCcombine]);
 +
 +                combine_nblists(nnbl-1, nbl+1, nbl[0]);
 +
 +                nbs_cycle_stop(&nbs->cc[enbsCCcombine]);
 +            }
 +        }
 +    }
 +
 +    if (!nbl_list->bSimple)
 +    {
 +        /* Sort the entries on size, large ones first */
 +        if (CombineNBLists || nnbl == 1)
 +        {
 +            sort_sci(nbl[0]);
 +        }
 +        else
 +        {
 +#pragma omp parallel for num_threads(nnbl) schedule(static)
 +            for (th = 0; th < nnbl; th++)
 +            {
 +                sort_sci(nbl[th]);
 +            }
 +        }
 +    }
 +
 +    if (nbat->bUseBufferFlags)
 +    {
 +        reduce_buffer_flags(nbs, nnbl, &nbat->buffer_flags);
 +    }
 +
 +    /* Special performance logging stuff (env.var. GMX_NBNXN_CYCLE) */
 +    if (LOCAL_I(iloc))
 +    {
 +        nbs->search_count++;
 +    }
 +    if (nbs->print_cycles &&
 +        (!nbs->DomDec || (nbs->DomDec && !LOCAL_I(iloc))) &&
 +        nbs->search_count % 100 == 0)
 +    {
 +        nbs_cycle_print(stderr, nbs);
 +    }
 +
 +    if (debug && (CombineNBLists && nnbl > 1))
 +    {
 +        if (nbl[0]->bSimple)
 +        {
 +            print_nblist_statistics_simple(debug, nbl[0], nbs, rlist);
 +        }
 +        else
 +        {
 +            print_nblist_statistics_supersub(debug, nbl[0], nbs, rlist);
 +        }
 +    }
 +
 +    if (debug)
 +    {
 +        if (gmx_debug_at)
 +        {
 +            if (nbl[0]->bSimple)
 +            {
 +                print_nblist_ci_cj(debug, nbl[0]);
 +            }
 +            else
 +            {
 +                print_nblist_sci_cj(debug, nbl[0]);
 +            }
 +        }
 +
 +        if (nbat->bUseBufferFlags)
 +        {
 +            print_reduction_cost(&nbat->buffer_flags, nnbl);
 +        }
 +    }
 +}
index 4cd29052deba902a503508ad5ba4378e0023b279,0000000000000000000000000000000000000000..5486510b63151ce0320b35c55b7f66f471295f38
mode 100644,000000..100644
--- /dev/null
@@@ -1,2182 -1,0 +1,2188 @@@
-                 aj     = ia[3];
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include "typedefs.h"
 +#include "vsite.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "nrnb.h"
 +#include "vec.h"
 +#include "mvdata.h"
 +#include "network.h"
 +#include "mshift.h"
 +#include "pbc.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "mtop_util.h"
 +#include "gmx_omp_nthreads.h"
 +#include "gmx_omp.h"
 +
 +/* Routines to send/recieve coordinates and force
 + * of constructing atoms.
 + */
 +
 +static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
 +{
 +    rvec *sendbuf;
 +    rvec *recvbuf;
 +    int   i, ia;
 +
 +    sendbuf = vsitecomm->send_buf;
 +    recvbuf = vsitecomm->recv_buf;
 +
 +
 +    /* Prepare pulse left by copying to send buffer */
 +    for (i = 0; i < vsitecomm->left_export_nconstruct; i++)
 +    {
 +        ia = vsitecomm->left_export_construct[i];
 +        copy_rvec(x[ia], sendbuf[i]);
 +    }
 +
 +    /* Pulse coordinates left */
 +    gmx_tx_rx_real(cr, GMX_LEFT, (real *)sendbuf, 3*vsitecomm->left_export_nconstruct, GMX_RIGHT, (real *)recvbuf, 3*vsitecomm->right_import_nconstruct);
 +
 +    /* Copy from receive buffer to coordinate array */
 +    for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
 +    {
 +        ia = vsitecomm->right_import_construct[i];
 +        copy_rvec(recvbuf[i], x[ia]);
 +    }
 +
 +    /* Prepare pulse right by copying to send buffer */
 +    for (i = 0; i < vsitecomm->right_export_nconstruct; i++)
 +    {
 +        ia = vsitecomm->right_export_construct[i];
 +        copy_rvec(x[ia], sendbuf[i]);
 +    }
 +
 +    /* Pulse coordinates right */
 +    gmx_tx_rx_real(cr, GMX_RIGHT, (real *)sendbuf, 3*vsitecomm->right_export_nconstruct, GMX_LEFT, (real *)recvbuf, 3*vsitecomm->left_import_nconstruct);
 +
 +    /* Copy from receive buffer to coordinate array */
 +    for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
 +    {
 +        ia = vsitecomm->left_import_construct[i];
 +        copy_rvec(recvbuf[i], x[ia]);
 +    }
 +}
 +
 +
 +static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
 +{
 +    rvec *sendbuf;
 +    rvec *recvbuf;
 +    int   i, ia;
 +
 +    sendbuf = vsitecomm->send_buf;
 +    recvbuf = vsitecomm->recv_buf;
 +
 +    /* Prepare pulse right by copying to send buffer */
 +    for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
 +    {
 +        ia = vsitecomm->right_import_construct[i];
 +        copy_rvec(f[ia], sendbuf[i]);
 +        clear_rvec(f[ia]);     /* Zero it here after moving, just to simplify debug book-keeping... */
 +    }
 +
 +    /* Pulse forces right */
 +    gmx_tx_rx_real(cr, GMX_RIGHT, (real *)sendbuf, 3*vsitecomm->right_import_nconstruct, GMX_LEFT, (real *)recvbuf, 3*vsitecomm->left_export_nconstruct);
 +
 +    /* Copy from receive buffer to coordinate array */
 +    for (i = 0; i < vsitecomm->left_export_nconstruct; i++)
 +    {
 +        ia = vsitecomm->left_export_construct[i];
 +        rvec_inc(f[ia], recvbuf[i]);
 +    }
 +
 +    /* Prepare pulse left by copying to send buffer */
 +    for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
 +    {
 +        ia = vsitecomm->left_import_construct[i];
 +        copy_rvec(f[ia], sendbuf[i]);
 +        clear_rvec(f[ia]);     /* Zero it here after moving, just to simplify debug book-keeping... */
 +    }
 +
 +    /* Pulse coordinates left */
 +    gmx_tx_rx_real(cr, GMX_LEFT, (real *)sendbuf, 3*vsitecomm->left_import_nconstruct, GMX_RIGHT, (real *)recvbuf, 3*vsitecomm->right_export_nconstruct);
 +
 +    /* Copy from receive buffer to coordinate array */
 +    for (i = 0; i < vsitecomm->right_export_nconstruct; i++)
 +    {
 +        ia = vsitecomm->right_export_construct[i];
 +        rvec_inc(f[ia], recvbuf[i]);
 +    }
 +
 +    /* All forces are now on the home processors */
 +}
 +
 +
 +static void
 +pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
 +{
 +    int i, ia;
 +
 +    for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
 +    {
 +        ia = vsitecomm->left_import_construct[i];
 +        clear_rvec(f[ia]);
 +    }
 +    for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
 +    {
 +        ia = vsitecomm->right_import_construct[i];
 +        clear_rvec(f[ia]);
 +    }
 +}
 +
 +
 +
 +static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
 +{
 +    if (pbc)
 +    {
 +        return pbc_dx_aiuc(pbc, xi, xj, dx);
 +    }
 +    else
 +    {
 +        rvec_sub(xi, xj, dx);
 +        return CENTRAL;
 +    }
 +}
 +
 +/* Vsite construction routines */
 +
 +static void constr_vsite2(rvec xi, rvec xj, rvec x, real a, t_pbc *pbc)
 +{
 +    real b;
 +    rvec dx;
 +
 +    b = 1.0-a;
 +    /* 1 flop */
 +
 +    if (pbc)
 +    {
 +        pbc_dx_aiuc(pbc, xj, xi, dx);
 +        x[XX] = xi[XX] + a*dx[XX];
 +        x[YY] = xi[YY] + a*dx[YY];
 +        x[ZZ] = xi[ZZ] + a*dx[ZZ];
 +    }
 +    else
 +    {
 +        x[XX] = b*xi[XX] + a*xj[XX];
 +        x[YY] = b*xi[YY] + a*xj[YY];
 +        x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
 +        /* 9 Flops */
 +    }
 +
 +    /* TOTAL: 10 flops */
 +}
 +
 +static void constr_vsite3(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
 +                          t_pbc *pbc)
 +{
 +    real c;
 +    rvec dxj, dxk;
 +
 +    c = 1.0-a-b;
 +    /* 2 flops */
 +
 +    if (pbc)
 +    {
 +        pbc_dx_aiuc(pbc, xj, xi, dxj);
 +        pbc_dx_aiuc(pbc, xk, xi, dxk);
 +        x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
 +        x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
 +        x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
 +    }
 +    else
 +    {
 +        x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
 +        x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
 +        x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
 +        /* 15 Flops */
 +    }
 +
 +    /* TOTAL: 17 flops */
 +}
 +
 +static void constr_vsite3FD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
 +                            t_pbc *pbc)
 +{
 +    rvec xij, xjk, temp;
 +    real c;
 +
 +    pbc_rvec_sub(pbc, xj, xi, xij);
 +    pbc_rvec_sub(pbc, xk, xj, xjk);
 +    /* 6 flops */
 +
 +    /* temp goes from i to a point on the line jk */
 +    temp[XX] = xij[XX] + a*xjk[XX];
 +    temp[YY] = xij[YY] + a*xjk[YY];
 +    temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
 +    /* 6 flops */
 +
 +    c = b*gmx_invsqrt(iprod(temp, temp));
 +    /* 6 + 10 flops */
 +
 +    x[XX] = xi[XX] + c*temp[XX];
 +    x[YY] = xi[YY] + c*temp[YY];
 +    x[ZZ] = xi[ZZ] + c*temp[ZZ];
 +    /* 6 Flops */
 +
 +    /* TOTAL: 34 flops */
 +}
 +
 +static void constr_vsite3FAD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b, t_pbc *pbc)
 +{
 +    rvec xij, xjk, xp;
 +    real a1, b1, c1, invdij;
 +
 +    pbc_rvec_sub(pbc, xj, xi, xij);
 +    pbc_rvec_sub(pbc, xk, xj, xjk);
 +    /* 6 flops */
 +
 +    invdij = gmx_invsqrt(iprod(xij, xij));
 +    c1     = invdij * invdij * iprod(xij, xjk);
 +    xp[XX] = xjk[XX] - c1*xij[XX];
 +    xp[YY] = xjk[YY] - c1*xij[YY];
 +    xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 +    a1     = a*invdij;
 +    b1     = b*gmx_invsqrt(iprod(xp, xp));
 +    /* 45 */
 +
 +    x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
 +    x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
 +    x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
 +    /* 12 Flops */
 +
 +    /* TOTAL: 63 flops */
 +}
 +
 +static void constr_vsite3OUT(rvec xi, rvec xj, rvec xk, rvec x,
 +                             real a, real b, real c, t_pbc *pbc)
 +{
 +    rvec xij, xik, temp;
 +
 +    pbc_rvec_sub(pbc, xj, xi, xij);
 +    pbc_rvec_sub(pbc, xk, xi, xik);
 +    cprod(xij, xik, temp);
 +    /* 15 Flops */
 +
 +    x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
 +    x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
 +    x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
 +    /* 18 Flops */
 +
 +    /* TOTAL: 33 flops */
 +}
 +
 +static void constr_vsite4FD(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
 +                            real a, real b, real c, t_pbc *pbc)
 +{
 +    rvec xij, xjk, xjl, temp;
 +    real d;
 +
 +    pbc_rvec_sub(pbc, xj, xi, xij);
 +    pbc_rvec_sub(pbc, xk, xj, xjk);
 +    pbc_rvec_sub(pbc, xl, xj, xjl);
 +    /* 9 flops */
 +
 +    /* temp goes from i to a point on the plane jkl */
 +    temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
 +    temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
 +    temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
 +    /* 12 flops */
 +
 +    d = c*gmx_invsqrt(iprod(temp, temp));
 +    /* 6 + 10 flops */
 +
 +    x[XX] = xi[XX] + d*temp[XX];
 +    x[YY] = xi[YY] + d*temp[YY];
 +    x[ZZ] = xi[ZZ] + d*temp[ZZ];
 +    /* 6 Flops */
 +
 +    /* TOTAL: 43 flops */
 +}
 +
 +
 +static void constr_vsite4FDN(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
 +                             real a, real b, real c, t_pbc *pbc)
 +{
 +    rvec xij, xik, xil, ra, rb, rja, rjb, rm;
 +    real d;
 +
 +    pbc_rvec_sub(pbc, xj, xi, xij);
 +    pbc_rvec_sub(pbc, xk, xi, xik);
 +    pbc_rvec_sub(pbc, xl, xi, xil);
 +    /* 9 flops */
 +
 +    ra[XX] = a*xik[XX];
 +    ra[YY] = a*xik[YY];
 +    ra[ZZ] = a*xik[ZZ];
 +
 +    rb[XX] = b*xil[XX];
 +    rb[YY] = b*xil[YY];
 +    rb[ZZ] = b*xil[ZZ];
 +
 +    /* 6 flops */
 +
 +    rvec_sub(ra, xij, rja);
 +    rvec_sub(rb, xij, rjb);
 +    /* 6 flops */
 +
 +    cprod(rja, rjb, rm);
 +    /* 9 flops */
 +
 +    d = c*gmx_invsqrt(norm2(rm));
 +    /* 5+5+1 flops */
 +
 +    x[XX] = xi[XX] + d*rm[XX];
 +    x[YY] = xi[YY] + d*rm[YY];
 +    x[ZZ] = xi[ZZ] + d*rm[ZZ];
 +    /* 6 Flops */
 +
 +    /* TOTAL: 47 flops */
 +}
 +
 +
 +static int constr_vsiten(t_iatom *ia, t_iparams ip[],
 +                         rvec *x, t_pbc *pbc)
 +{
 +    rvec xs, x1, dx;
 +    dvec dsum;
 +    int  n3, av, ai, i;
 +    real a;
 +
 +    n3 = 3*ip[ia[0]].vsiten.n;
 +    av = ia[1];
 +    ai = ia[2];
 +    copy_rvec(x[ai], x1);
 +    clear_dvec(dsum);
 +    for (i = 3; i < n3; i += 3)
 +    {
 +        ai = ia[i+2];
 +        a  = ip[ia[i]].vsiten.a;
 +        if (pbc)
 +        {
 +            pbc_dx_aiuc(pbc, x[ai], x1, dx);
 +        }
 +        else
 +        {
 +            rvec_sub(x[ai], x1, dx);
 +        }
 +        dsum[XX] += a*dx[XX];
 +        dsum[YY] += a*dx[YY];
 +        dsum[ZZ] += a*dx[ZZ];
 +        /* 9 Flops */
 +    }
 +
 +    x[av][XX] = x1[XX] + dsum[XX];
 +    x[av][YY] = x1[YY] + dsum[YY];
 +    x[av][ZZ] = x1[ZZ] + dsum[ZZ];
 +
 +    return n3;
 +}
 +
 +
 +void construct_vsites_thread(gmx_vsite_t *vsite,
 +                             rvec x[], t_nrnb *nrnb,
 +                             real dt, rvec *v,
 +                             t_iparams ip[], t_ilist ilist[],
 +                             t_pbc *pbc_null)
 +{
 +    gmx_bool   bPBCAll;
 +    rvec       xpbc, xv, vv, dx;
 +    real       a1, b1, c1, inv_dt;
 +    int        i, inc, ii, nra, nr, tp, ftype;
 +    t_iatom    avsite, ai, aj, ak, al, pbc_atom;
 +    t_iatom   *ia;
 +    t_pbc     *pbc_null2;
 +    int       *vsite_pbc, ishift;
 +    rvec       reftmp, vtmp, rtmp;
 +
 +    if (v != NULL)
 +    {
 +        inv_dt = 1.0/dt;
 +    }
 +    else
 +    {
 +        inv_dt = 1.0;
 +    }
 +
 +    bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
 +
 +    pbc_null2 = NULL;
 +    vsite_pbc = NULL;
 +    for (ftype = 0; (ftype < F_NRE); ftype++)
 +    {
 +        if ((interaction_function[ftype].flags & IF_VSITE) &&
 +            ilist[ftype].nr > 0)
 +        {
 +            nra    = interaction_function[ftype].nratoms;
 +            inc    = 1 + nra;
 +            nr     = ilist[ftype].nr;
 +            ia     = ilist[ftype].iatoms;
 +
 +            if (bPBCAll)
 +            {
 +                pbc_null2 = pbc_null;
 +            }
 +            else if (pbc_null != NULL)
 +            {
 +                vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 +            }
 +
 +            for (i = 0; i < nr; )
 +            {
 +                tp   = ia[0];
 +
 +                /* The vsite and constructing atoms */
 +                avsite = ia[1];
 +                ai     = ia[2];
 +
 +                /* Constants for constructing vsites */
 +                a1   = ip[tp].vsite.a;
 +                /* Check what kind of pbc we need to use */
 +                if (bPBCAll)
 +                {
 +                    /* No charge groups, vsite follows its own pbc */
 +                    pbc_atom = avsite;
 +                    copy_rvec(x[avsite], xpbc);
 +                }
 +                else if (vsite_pbc != NULL)
 +                {
 +                    pbc_atom = vsite_pbc[i/(1+nra)];
 +                    if (pbc_atom > -2)
 +                    {
 +                        if (pbc_atom >= 0)
 +                        {
 +                            /* We need to copy the coordinates here,
 +                             * single for single atom cg's pbc_atom
 +                             * is the vsite itself.
 +                             */
 +                            copy_rvec(x[pbc_atom], xpbc);
 +                        }
 +                        pbc_null2 = pbc_null;
 +                    }
 +                    else
 +                    {
 +                        pbc_null2 = NULL;
 +                    }
 +                }
 +                else
 +                {
 +                    pbc_atom = -2;
 +                }
 +                /* Copy the old position */
 +                copy_rvec(x[avsite], xv);
 +
 +                /* Construct the vsite depending on type */
 +                switch (ftype)
 +                {
 +                    case F_VSITE2:
++                        aj = ia[3];
 +                        constr_vsite2(x[ai], x[aj], x[avsite], a1, pbc_null2);
 +                        break;
 +                    case F_VSITE3:
++                        aj = ia[3];
 +                        ak = ia[4];
 +                        b1 = ip[tp].vsite.b;
 +                        constr_vsite3(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
 +                        break;
 +                    case F_VSITE3FD:
++                        aj = ia[3];
 +                        ak = ia[4];
 +                        b1 = ip[tp].vsite.b;
 +                        constr_vsite3FD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
 +                        break;
 +                    case F_VSITE3FAD:
++                        aj = ia[3];
 +                        ak = ia[4];
 +                        b1 = ip[tp].vsite.b;
 +                        constr_vsite3FAD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
 +                        break;
 +                    case F_VSITE3OUT:
++                        aj = ia[3];
 +                        ak = ia[4];
 +                        b1 = ip[tp].vsite.b;
 +                        c1 = ip[tp].vsite.c;
 +                        constr_vsite3OUT(x[ai], x[aj], x[ak], x[avsite], a1, b1, c1, pbc_null2);
 +                        break;
 +                    case F_VSITE4FD:
++                        aj = ia[3];
 +                        ak = ia[4];
 +                        al = ia[5];
 +                        b1 = ip[tp].vsite.b;
 +                        c1 = ip[tp].vsite.c;
 +                        constr_vsite4FD(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
 +                                        pbc_null2);
 +                        break;
 +                    case F_VSITE4FDN:
++                        aj = ia[3];
 +                        ak = ia[4];
 +                        al = ia[5];
 +                        b1 = ip[tp].vsite.b;
 +                        c1 = ip[tp].vsite.c;
 +                        constr_vsite4FDN(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
 +                                         pbc_null2);
 +                        break;
 +                    case F_VSITEN:
 +                        inc = constr_vsiten(ia, ip, x, pbc_null2);
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
 +                                  ftype, __FILE__, __LINE__);
 +                }
 +
 +                if (pbc_atom >= 0)
 +                {
 +                    /* Match the pbc of this vsite to the rest of its charge group */
 +                    ishift = pbc_dx_aiuc(pbc_null, x[avsite], xpbc, dx);
 +                    if (ishift != CENTRAL)
 +                    {
 +                        rvec_add(xpbc, dx, x[avsite]);
 +                    }
 +                }
 +                if (v != NULL)
 +                {
 +                    /* Calculate velocity of vsite... */
 +                    rvec_sub(x[avsite], xv, vv);
 +                    svmul(inv_dt, vv, v[avsite]);
 +                }
 +
 +                /* Increment loop variables */
 +                i  += inc;
 +                ia += inc;
 +            }
 +        }
 +    }
 +}
 +
 +void construct_vsites(FILE *log, gmx_vsite_t *vsite,
 +                      rvec x[], t_nrnb *nrnb,
 +                      real dt, rvec *v,
 +                      t_iparams ip[], t_ilist ilist[],
 +                      int ePBC, gmx_bool bMolPBC, t_graph *graph,
 +                      t_commrec *cr, matrix box)
 +{
 +    t_pbc     pbc, *pbc_null;
 +    gmx_bool  bDomDec;
 +    int       nthreads;
 +
 +    bDomDec = cr && DOMAINDECOMP(cr);
 +
 +    /* We only need to do pbc when we have inter-cg vsites */
 +    if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
 +    {
 +        /* This is wasting some CPU time as we now do this multiple times
 +         * per MD step. But how often do we have vsites with full pbc?
 +         */
 +        pbc_null = set_pbc_dd(&pbc, ePBC, cr != NULL ? cr->dd : NULL, FALSE, box);
 +    }
 +    else
 +    {
 +        pbc_null = NULL;
 +    }
 +
 +    if (cr)
 +    {
 +        if (bDomDec)
 +        {
 +            dd_move_x_vsites(cr->dd, box, x);
 +        }
 +        else if (vsite->bPDvsitecomm)
 +        {
 +            /* I'm not sure whether the periodicity and shift are guaranteed
 +             * to be consistent between different nodes when running e.g. polymers
 +             * in parallel. In this special case we thus unshift/shift,
 +             * but only when necessary. This is to make sure the coordinates
 +             * we move don't end up a box away...
 +             */
 +            if (graph != NULL)
 +            {
 +                unshift_self(graph, box, x);
 +            }
 +
 +            move_construct_x(vsite->vsitecomm, x, cr);
 +
 +            if (graph != NULL)
 +            {
 +                shift_self(graph, box, x);
 +            }
 +        }
 +    }
 +
 +    if (vsite->nthreads == 1)
 +    {
 +        construct_vsites_thread(vsite,
 +                                x, nrnb, dt, v,
 +                                ip, ilist,
 +                                pbc_null);
 +    }
 +    else
 +    {
 +#pragma omp parallel num_threads(vsite->nthreads)
 +        {
 +            construct_vsites_thread(vsite,
 +                                    x, nrnb, dt, v,
 +                                    ip, vsite->tdata[gmx_omp_get_thread_num()].ilist,
 +                                    pbc_null);
 +        }
 +        /* Now we can construct the vsites that might depend on other vsites */
 +        construct_vsites_thread(vsite,
 +                                x, nrnb, dt, v,
 +                                ip, vsite->tdata[vsite->nthreads].ilist,
 +                                pbc_null);
 +    }
 +}
 +
 +static void spread_vsite2(t_iatom ia[], real a,
 +                          rvec x[], rvec f[], rvec fshift[],
 +                          t_pbc *pbc, t_graph *g)
 +{
 +    rvec    fi, fj, dx;
 +    t_iatom av, ai, aj;
 +    ivec    di;
 +    real    b;
 +    int     siv, sij;
 +
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +
 +    svmul(1-a, f[av], fi);
 +    svmul(  a, f[av], fj);
 +    /* 7 flop */
 +
 +    rvec_inc(f[ai], fi);
 +    rvec_inc(f[aj], fj);
 +    /* 6 Flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
 +        siv = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
 +        sij = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
 +        sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 +    }
 +    else
 +    {
 +        siv = CENTRAL;
 +        sij = CENTRAL;
 +    }
 +
 +    if (fshift && (siv != CENTRAL || sij != CENTRAL))
 +    {
 +        rvec_inc(fshift[siv], f[av]);
 +        rvec_dec(fshift[CENTRAL], fi);
 +        rvec_dec(fshift[sij], fj);
 +    }
 +
 +    /* TOTAL: 13 flops */
 +}
 +
 +void construct_vsites_mtop(FILE *log, gmx_vsite_t *vsite,
 +                           gmx_mtop_t *mtop, rvec x[])
 +{
 +    int             as, mb, mol;
 +    gmx_molblock_t *molb;
 +    gmx_moltype_t  *molt;
 +
 +    as = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        for (mol = 0; mol < molb->nmol; mol++)
 +        {
 +            construct_vsites(log, vsite, x+as, NULL, 0.0, NULL,
 +                             mtop->ffparams.iparams, molt->ilist,
 +                             epbcNONE, TRUE, NULL, NULL, NULL);
 +            as += molt->atoms.nr;
 +        }
 +    }
 +}
 +
 +static void spread_vsite3(t_iatom ia[], real a, real b,
 +                          rvec x[], rvec f[], rvec fshift[],
 +                          t_pbc *pbc, t_graph *g)
 +{
 +    rvec    fi, fj, fk, dx;
 +    atom_id av, ai, aj, ak;
 +    ivec    di;
 +    int     siv, sij, sik;
 +
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +    ak = ia[4];
 +
 +    svmul(1-a-b, f[av], fi);
 +    svmul(    a, f[av], fj);
 +    svmul(    b, f[av], fk);
 +    /* 11 flops */
 +
 +    rvec_inc(f[ai], fi);
 +    rvec_inc(f[aj], fj);
 +    rvec_inc(f[ak], fk);
 +    /* 9 Flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ia[1]), di);
 +        siv = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
 +        sij = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), di);
 +        sik = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
 +        sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 +        sik = pbc_dx_aiuc(pbc, x[ai], x[ak], dx);
 +    }
 +    else
 +    {
 +        siv = CENTRAL;
 +        sij = CENTRAL;
 +        sik = CENTRAL;
 +    }
 +
 +    if (fshift && (siv != CENTRAL || sij != CENTRAL || sik != CENTRAL))
 +    {
 +        rvec_inc(fshift[siv], f[av]);
 +        rvec_dec(fshift[CENTRAL], fi);
 +        rvec_dec(fshift[sij], fj);
 +        rvec_dec(fshift[sik], fk);
 +    }
 +
 +    /* TOTAL: 20 flops */
 +}
 +
 +static void spread_vsite3FD(t_iatom ia[], real a, real b,
 +                            rvec x[], rvec f[], rvec fshift[],
 +                            gmx_bool VirCorr, matrix dxdf,
 +                            t_pbc *pbc, t_graph *g)
 +{
 +    real    fx, fy, fz, c, invl, fproj, a1;
 +    rvec    xvi, xij, xjk, xix, fv, temp;
 +    t_iatom av, ai, aj, ak;
 +    int     svi, sji, skj, d;
 +    ivec    di;
 +
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +    ak = ia[4];
 +    copy_rvec(f[av], fv);
 +
 +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
 +    skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
 +    /* 6 flops */
 +
 +    /* xix goes from i to point x on the line jk */
 +    xix[XX] = xij[XX]+a*xjk[XX];
 +    xix[YY] = xij[YY]+a*xjk[YY];
 +    xix[ZZ] = xij[ZZ]+a*xjk[ZZ];
 +    /* 6 flops */
 +
 +    invl = gmx_invsqrt(iprod(xix, xix));
 +    c    = b*invl;
 +    /* 4 + ?10? flops */
 +
 +    fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 +
 +    temp[XX] = c*(fv[XX]-fproj*xix[XX]);
 +    temp[YY] = c*(fv[YY]-fproj*xix[YY]);
 +    temp[ZZ] = c*(fv[ZZ]-fproj*xix[ZZ]);
 +    /* 16 */
 +
 +    /* c is already calculated in constr_vsite3FD
 +       storing c somewhere will save 26 flops!     */
 +
 +    a1         = 1-a;
 +    f[ai][XX] += fv[XX] - temp[XX];
 +    f[ai][YY] += fv[YY] - temp[YY];
 +    f[ai][ZZ] += fv[ZZ] - temp[ZZ];
 +    f[aj][XX] += a1*temp[XX];
 +    f[aj][YY] += a1*temp[YY];
 +    f[aj][ZZ] += a1*temp[ZZ];
 +    f[ak][XX] += a*temp[XX];
 +    f[ak][YY] += a*temp[YY];
 +    f[ak][ZZ] += a*temp[ZZ];
 +    /* 19 Flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
 +        svi = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
 +        sji = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
 +        skj = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
 +    }
 +    else
 +    {
 +        svi = CENTRAL;
 +    }
 +
 +    if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
 +    {
 +        rvec_dec(fshift[svi], fv);
 +        fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
 +        fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
 +        fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
 +        fshift[    sji][XX] += temp[XX];
 +        fshift[    sji][YY] += temp[YY];
 +        fshift[    sji][ZZ] += temp[ZZ];
 +        fshift[    skj][XX] += a*temp[XX];
 +        fshift[    skj][YY] += a*temp[YY];
 +        fshift[    skj][ZZ] += a*temp[ZZ];
 +    }
 +
 +    if (VirCorr)
 +    {
 +        /* When VirCorr=TRUE, the virial for the current forces is not
 +         * calculated from the redistributed forces. This means that
 +         * the effect of non-linear virtual site constructions on the virial
 +         * needs to be added separately. This contribution can be calculated
 +         * in many ways, but the simplest and cheapest way is to use
 +         * the first constructing atom ai as a reference position in space:
 +         * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
 +         */
 +        rvec xiv;
 +        int  i, j;
 +
 +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
 +
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                /* As xix is a linear combination of j and k, use that here */
 +                dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
 +            }
 +        }
 +    }
 +
 +    /* TOTAL: 61 flops */
 +}
 +
 +static void spread_vsite3FAD(t_iatom ia[], real a, real b,
 +                             rvec x[], rvec f[], rvec fshift[],
 +                             gmx_bool VirCorr, matrix dxdf,
 +                             t_pbc *pbc, t_graph *g)
 +{
 +    rvec    xvi, xij, xjk, xperp, Fpij, Fppp, fv, f1, f2, f3;
 +    real    a1, b1, c1, c2, invdij, invdij2, invdp, fproj;
 +    t_iatom av, ai, aj, ak;
 +    int     svi, sji, skj, d;
 +    ivec    di;
 +
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +    ak = ia[4];
 +    copy_rvec(f[ia[1]], fv);
 +
 +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
 +    skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
 +    /* 6 flops */
 +
 +    invdij    = gmx_invsqrt(iprod(xij, xij));
 +    invdij2   = invdij * invdij;
 +    c1        = iprod(xij, xjk) * invdij2;
 +    xperp[XX] = xjk[XX] - c1*xij[XX];
 +    xperp[YY] = xjk[YY] - c1*xij[YY];
 +    xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 +    /* xperp in plane ijk, perp. to ij */
 +    invdp = gmx_invsqrt(iprod(xperp, xperp));
 +    a1    = a*invdij;
 +    b1    = b*invdp;
 +    /* 45 flops */
 +
 +    /* a1, b1 and c1 are already calculated in constr_vsite3FAD
 +       storing them somewhere will save 45 flops!     */
 +
 +    fproj = iprod(xij, fv)*invdij2;
 +    svmul(fproj,                      xij,  Fpij);    /* proj. f on xij */
 +    svmul(iprod(xperp, fv)*invdp*invdp, xperp, Fppp); /* proj. f on xperp */
 +    svmul(b1*fproj,                   xperp, f3);
 +    /* 23 flops */
 +
 +    rvec_sub(fv, Fpij, f1); /* f1 = f - Fpij */
 +    rvec_sub(f1, Fppp, f2); /* f2 = f - Fpij - Fppp */
 +    for (d = 0; (d < DIM); d++)
 +    {
 +        f1[d] *= a1;
 +        f2[d] *= b1;
 +    }
 +    /* 12 flops */
 +
 +    c2         = 1+c1;
 +    f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
 +    f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
 +    f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
 +    f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
 +    f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
 +    f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
 +    f[ak][XX] +=                      f2[XX];
 +    f[ak][YY] +=                      f2[YY];
 +    f[ak][ZZ] +=                      f2[ZZ];
 +    /* 30 Flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
 +        svi = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
 +        sji = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
 +        skj = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
 +    }
 +    else
 +    {
 +        svi = CENTRAL;
 +    }
 +
 +    if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
 +    {
 +        rvec_dec(fshift[svi], fv);
 +        fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
 +        fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
 +        fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
 +        fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
 +        fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
 +        fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
 +        fshift[    skj][XX] +=                          f2[XX];
 +        fshift[    skj][YY] +=                          f2[YY];
 +        fshift[    skj][ZZ] +=                          f2[ZZ];
 +    }
 +
 +    if (VirCorr)
 +    {
 +        rvec xiv;
 +        int  i, j;
 +
 +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
 +
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                /* Note that xik=xij+xjk, so we have to add xij*f2 */
 +                dxdf[i][j] +=
 +                    -xiv[i]*fv[j]
 +                    + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
 +                    + xjk[i]*f2[j];
 +            }
 +        }
 +    }
 +
 +    /* TOTAL: 113 flops */
 +}
 +
 +static void spread_vsite3OUT(t_iatom ia[], real a, real b, real c,
 +                             rvec x[], rvec f[], rvec fshift[],
 +                             gmx_bool VirCorr, matrix dxdf,
 +                             t_pbc *pbc, t_graph *g)
 +{
 +    rvec    xvi, xij, xik, fv, fj, fk;
 +    real    cfx, cfy, cfz;
 +    atom_id av, ai, aj, ak;
 +    ivec    di;
 +    int     svi, sji, ski;
 +
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +    ak = ia[4];
 +
 +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
 +    ski = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
 +    /* 6 Flops */
 +
 +    copy_rvec(f[av], fv);
 +
 +    cfx = c*fv[XX];
 +    cfy = c*fv[YY];
 +    cfz = c*fv[ZZ];
 +    /* 3 Flops */
 +
 +    fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
 +    fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
 +    fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
 +
 +    fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
 +    fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
 +    fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
 +    /* 30 Flops */
 +
 +    f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
 +    f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
 +    f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 +    rvec_inc(f[aj], fj);
 +    rvec_inc(f[ak], fk);
 +    /* 15 Flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
 +        svi = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
 +        sji = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
 +        ski = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
 +    }
 +    else
 +    {
 +        svi = CENTRAL;
 +    }
 +
 +    if (fshift && (svi != CENTRAL || sji != CENTRAL || ski != CENTRAL))
 +    {
 +        rvec_dec(fshift[svi], fv);
 +        fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
 +        fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
 +        fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 +        rvec_inc(fshift[sji], fj);
 +        rvec_inc(fshift[ski], fk);
 +    }
 +
 +    if (VirCorr)
 +    {
 +        rvec xiv;
 +        int  i, j;
 +
 +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
 +
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
 +            }
 +        }
 +    }
 +
 +    /* TOTAL: 54 flops */
 +}
 +
 +static void spread_vsite4FD(t_iatom ia[], real a, real b, real c,
 +                            rvec x[], rvec f[], rvec fshift[],
 +                            gmx_bool VirCorr, matrix dxdf,
 +                            t_pbc *pbc, t_graph *g)
 +{
 +    real    d, invl, fproj, a1;
 +    rvec    xvi, xij, xjk, xjl, xix, fv, temp;
 +    atom_id av, ai, aj, ak, al;
 +    ivec    di;
 +    int     svi, sji, skj, slj, m;
 +
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +    ak = ia[4];
 +    al = ia[5];
 +
 +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
 +    skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
 +    slj = pbc_rvec_sub(pbc, x[al], x[aj], xjl);
 +    /* 9 flops */
 +
 +    /* xix goes from i to point x on the plane jkl */
 +    for (m = 0; m < DIM; m++)
 +    {
 +        xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
 +    }
 +    /* 12 flops */
 +
 +    invl = gmx_invsqrt(iprod(xix, xix));
 +    d    = c*invl;
 +    /* 4 + ?10? flops */
 +
 +    copy_rvec(f[av], fv);
 +
 +    fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 +
 +    for (m = 0; m < DIM; m++)
 +    {
 +        temp[m] = d*(fv[m] - fproj*xix[m]);
 +    }
 +    /* 16 */
 +
 +    /* c is already calculated in constr_vsite3FD
 +       storing c somewhere will save 35 flops!     */
 +
 +    a1 = 1 - a - b;
 +    for (m = 0; m < DIM; m++)
 +    {
 +        f[ai][m] += fv[m] - temp[m];
 +        f[aj][m] += a1*temp[m];
 +        f[ak][m] += a*temp[m];
 +        f[al][m] += b*temp[m];
 +    }
 +    /* 26 Flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
 +        svi = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
 +        sji = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
 +        skj = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, aj), di);
 +        slj = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
 +    }
 +    else
 +    {
 +        svi = CENTRAL;
 +    }
 +
 +    if (fshift &&
 +        (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL || slj != CENTRAL))
 +    {
 +        rvec_dec(fshift[svi], fv);
 +        for (m = 0; m < DIM; m++)
 +        {
 +            fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
 +            fshift[    sji][m] += temp[m];
 +            fshift[    skj][m] += a*temp[m];
 +            fshift[    slj][m] += b*temp[m];
 +        }
 +    }
 +
 +    if (VirCorr)
 +    {
 +        rvec xiv;
 +        int  i, j;
 +
 +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
 +
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
 +            }
 +        }
 +    }
 +
 +    /* TOTAL: 77 flops */
 +}
 +
 +
 +static void spread_vsite4FDN(t_iatom ia[], real a, real b, real c,
 +                             rvec x[], rvec f[], rvec fshift[],
 +                             gmx_bool VirCorr, matrix dxdf,
 +                             t_pbc *pbc, t_graph *g)
 +{
 +    rvec xvi, xij, xik, xil, ra, rb, rja, rjb, rab, rm, rt;
 +    rvec fv, fj, fk, fl;
 +    real invrm, denom;
 +    real cfx, cfy, cfz;
 +    ivec di;
 +    int  av, ai, aj, ak, al;
 +    int  svi, sij, sik, sil;
 +
 +    /* DEBUG: check atom indices */
 +    av = ia[1];
 +    ai = ia[2];
 +    aj = ia[3];
 +    ak = ia[4];
 +    al = ia[5];
 +
 +    copy_rvec(f[av], fv);
 +
 +    sij = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
 +    sik = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
 +    sil = pbc_rvec_sub(pbc, x[al], x[ai], xil);
 +    /* 9 flops */
 +
 +    ra[XX] = a*xik[XX];
 +    ra[YY] = a*xik[YY];
 +    ra[ZZ] = a*xik[ZZ];
 +
 +    rb[XX] = b*xil[XX];
 +    rb[YY] = b*xil[YY];
 +    rb[ZZ] = b*xil[ZZ];
 +
 +    /* 6 flops */
 +
 +    rvec_sub(ra, xij, rja);
 +    rvec_sub(rb, xij, rjb);
 +    rvec_sub(rb, ra, rab);
 +    /* 9 flops */
 +
 +    cprod(rja, rjb, rm);
 +    /* 9 flops */
 +
 +    invrm = gmx_invsqrt(norm2(rm));
 +    denom = invrm*invrm;
 +    /* 5+5+2 flops */
 +
 +    cfx = c*invrm*fv[XX];
 +    cfy = c*invrm*fv[YY];
 +    cfz = c*invrm*fv[ZZ];
 +    /* 6 Flops */
 +
 +    cprod(rm, rab, rt);
 +    /* 9 flops */
 +
 +    rt[XX] *= denom;
 +    rt[YY] *= denom;
 +    rt[ZZ] *= denom;
 +    /* 3flops */
 +
 +    fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
 +    fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
 +    fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
 +    /* 30 flops */
 +
 +    cprod(rjb, rm, rt);
 +    /* 9 flops */
 +
 +    rt[XX] *= denom*a;
 +    rt[YY] *= denom*a;
 +    rt[ZZ] *= denom*a;
 +    /* 3flops */
 +
 +    fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
 +    fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
 +    fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
 +    /* 36 flops */
 +
 +    cprod(rm, rja, rt);
 +    /* 9 flops */
 +
 +    rt[XX] *= denom*b;
 +    rt[YY] *= denom*b;
 +    rt[ZZ] *= denom*b;
 +    /* 3flops */
 +
 +    fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
 +    fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
 +    fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
 +    /* 36 flops */
 +
 +    f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
 +    f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
 +    f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
 +    rvec_inc(f[aj], fj);
 +    rvec_inc(f[ak], fk);
 +    rvec_inc(f[al], fl);
 +    /* 21 flops */
 +
 +    if (g)
 +    {
 +        ivec_sub(SHIFT_IVEC(g, av), SHIFT_IVEC(g, ai), di);
 +        svi = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
 +        sij = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
 +        sik = IVEC2IS(di);
 +        ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, ai), di);
 +        sil = IVEC2IS(di);
 +    }
 +    else if (pbc)
 +    {
 +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
 +    }
 +    else
 +    {
 +        svi = CENTRAL;
 +    }
 +
 +    if (fshift && (svi != CENTRAL || sij != CENTRAL || sik != CENTRAL || sil != CENTRAL))
 +    {
 +        rvec_dec(fshift[svi], fv);
 +        fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
 +        fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
 +        fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
 +        rvec_inc(fshift[sij], fj);
 +        rvec_inc(fshift[sik], fk);
 +        rvec_inc(fshift[sil], fl);
 +    }
 +
 +    if (VirCorr)
 +    {
 +        rvec xiv;
 +        int  i, j;
 +
 +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
 +
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
 +            }
 +        }
 +    }
 +
 +    /* Total: 207 flops (Yuck!) */
 +}
 +
 +
 +static int spread_vsiten(t_iatom ia[], t_iparams ip[],
 +                         rvec x[], rvec f[], rvec fshift[],
 +                         t_pbc *pbc, t_graph *g)
 +{
 +    rvec xv, dx, fi;
 +    int  n3, av, i, ai;
 +    real a;
 +    ivec di;
 +    int  siv;
 +
 +    n3 = 3*ip[ia[0]].vsiten.n;
 +    av = ia[1];
 +    copy_rvec(x[av], xv);
 +
 +    for (i = 0; i < n3; i += 3)
 +    {
 +        ai = ia[i+2];
 +        if (g)
 +        {
 +            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
 +            siv = IVEC2IS(di);
 +        }
 +        else if (pbc)
 +        {
 +            siv = pbc_dx_aiuc(pbc, x[ai], xv, dx);
 +        }
 +        else
 +        {
 +            siv = CENTRAL;
 +        }
 +        a = ip[ia[i]].vsiten.a;
 +        svmul(a, f[av], fi);
 +        rvec_inc(f[ai], fi);
 +        if (fshift && siv != CENTRAL)
 +        {
 +            rvec_inc(fshift[siv], fi);
 +            rvec_dec(fshift[CENTRAL], fi);
 +        }
 +        /* 6 Flops */
 +    }
 +
 +    return n3;
 +}
 +
 +
 +static int vsite_count(const t_ilist *ilist, int ftype)
 +{
 +    if (ftype == F_VSITEN)
 +    {
 +        return ilist[ftype].nr/3;
 +    }
 +    else
 +    {
 +        return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
 +    }
 +}
 +
 +static void spread_vsite_f_thread(gmx_vsite_t *vsite,
 +                                  rvec x[], rvec f[], rvec *fshift,
 +                                  gmx_bool VirCorr, matrix dxdf,
 +                                  t_iparams ip[], t_ilist ilist[],
 +                                  t_graph *g, t_pbc *pbc_null)
 +{
 +    gmx_bool   bPBCAll;
 +    real       a1, b1, c1;
 +    int        i, inc, m, nra, nr, tp, ftype;
 +    t_iatom   *ia;
 +    t_pbc     *pbc_null2;
 +    int       *vsite_pbc;
 +
 +    if (VirCorr)
 +    {
 +        clear_mat(dxdf);
 +    }
 +
 +    bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
 +
 +    /* this loop goes backwards to be able to build *
 +     * higher type vsites from lower types         */
 +    pbc_null2 = NULL;
 +    vsite_pbc = NULL;
 +    for (ftype = F_NRE-1; (ftype >= 0); ftype--)
 +    {
 +        if ((interaction_function[ftype].flags & IF_VSITE) &&
 +            ilist[ftype].nr > 0)
 +        {
 +            nra    = interaction_function[ftype].nratoms;
 +            inc    = 1 + nra;
 +            nr     = ilist[ftype].nr;
 +            ia     = ilist[ftype].iatoms;
 +
 +            if (bPBCAll)
 +            {
 +                pbc_null2 = pbc_null;
 +            }
 +            else if (pbc_null != NULL)
 +            {
 +                vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 +            }
 +
 +            for (i = 0; i < nr; )
 +            {
 +                if (vsite_pbc != NULL)
 +                {
 +                    if (vsite_pbc[i/(1+nra)] > -2)
 +                    {
 +                        pbc_null2 = pbc_null;
 +                    }
 +                    else
 +                    {
 +                        pbc_null2 = NULL;
 +                    }
 +                }
 +
 +                tp   = ia[0];
 +
 +                /* Constants for constructing */
 +                a1   = ip[tp].vsite.a;
 +                /* Construct the vsite depending on type */
 +                switch (ftype)
 +                {
 +                    case F_VSITE2:
 +                        spread_vsite2(ia, a1, x, f, fshift, pbc_null2, g);
 +                        break;
 +                    case F_VSITE3:
 +                        b1 = ip[tp].vsite.b;
 +                        spread_vsite3(ia, a1, b1, x, f, fshift, pbc_null2, g);
 +                        break;
 +                    case F_VSITE3FD:
 +                        b1 = ip[tp].vsite.b;
 +                        spread_vsite3FD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
 +                        break;
 +                    case F_VSITE3FAD:
 +                        b1 = ip[tp].vsite.b;
 +                        spread_vsite3FAD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
 +                        break;
 +                    case F_VSITE3OUT:
 +                        b1 = ip[tp].vsite.b;
 +                        c1 = ip[tp].vsite.c;
 +                        spread_vsite3OUT(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
 +                        break;
 +                    case F_VSITE4FD:
 +                        b1 = ip[tp].vsite.b;
 +                        c1 = ip[tp].vsite.c;
 +                        spread_vsite4FD(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
 +                        break;
 +                    case F_VSITE4FDN:
 +                        b1 = ip[tp].vsite.b;
 +                        c1 = ip[tp].vsite.c;
 +                        spread_vsite4FDN(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
 +                        break;
 +                    case F_VSITEN:
 +                        inc = spread_vsiten(ia, ip, x, f, fshift, pbc_null2, g);
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
 +                                  ftype, __FILE__, __LINE__);
 +                }
 +                clear_rvec(f[ia[1]]);
 +
 +                /* Increment loop variables */
 +                i  += inc;
 +                ia += inc;
 +            }
 +        }
 +    }
 +}
 +
 +void spread_vsite_f(FILE *log, gmx_vsite_t *vsite,
 +                    rvec x[], rvec f[], rvec *fshift,
 +                    gmx_bool VirCorr, matrix vir,
 +                    t_nrnb *nrnb, t_idef *idef,
 +                    int ePBC, gmx_bool bMolPBC, t_graph *g, matrix box,
 +                    t_commrec *cr)
 +{
 +    t_pbc pbc, *pbc_null;
 +    int   th;
 +
 +    /* We only need to do pbc when we have inter-cg vsites */
 +    if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
 +    {
 +        /* This is wasting some CPU time as we now do this multiple times
 +         * per MD step. But how often do we have vsites with full pbc?
 +         */
 +        pbc_null = set_pbc_dd(&pbc, ePBC, cr->dd, FALSE, box);
 +    }
 +    else
 +    {
 +        pbc_null = NULL;
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_clear_f_vsites(cr->dd, f);
 +    }
 +    else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
 +    {
 +        pd_clear_nonlocal_constructs(vsite->vsitecomm, f);
 +    }
 +
 +    if (vsite->nthreads == 1)
 +    {
 +        spread_vsite_f_thread(vsite,
 +                              x, f, fshift,
 +                              VirCorr, vsite->tdata[0].dxdf,
 +                              idef->iparams, idef->il,
 +                              g, pbc_null);
 +    }
 +    else
 +    {
 +        /* First spread the vsites that might depend on other vsites */
 +        spread_vsite_f_thread(vsite,
 +                              x, f, fshift,
 +                              VirCorr, vsite->tdata[vsite->nthreads].dxdf,
 +                              idef->iparams,
 +                              vsite->tdata[vsite->nthreads].ilist,
 +                              g, pbc_null);
 +
 +#pragma omp parallel num_threads(vsite->nthreads)
 +        {
 +            int   thread;
 +            rvec *fshift_t;
 +
 +            thread = gmx_omp_get_thread_num();
 +
 +            if (thread == 0 || fshift == NULL)
 +            {
 +                fshift_t = fshift;
 +            }
 +            else
 +            {
 +                int i;
 +
 +                fshift_t = vsite->tdata[thread].fshift;
 +
 +                for (i = 0; i < SHIFTS; i++)
 +                {
 +                    clear_rvec(fshift_t[i]);
 +                }
 +            }
 +
 +            spread_vsite_f_thread(vsite,
 +                                  x, f, fshift_t,
 +                                  VirCorr, vsite->tdata[thread].dxdf,
 +                                  idef->iparams,
 +                                  vsite->tdata[thread].ilist,
 +                                  g, pbc_null);
 +        }
 +
 +        if (fshift != NULL)
 +        {
 +            int i;
 +
 +            for (th = 1; th < vsite->nthreads; th++)
 +            {
 +                for (i = 0; i < SHIFTS; i++)
 +                {
 +                    rvec_inc(fshift[i], vsite->tdata[th].fshift[i]);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (VirCorr)
 +    {
 +        int i, j;
 +
 +        for (th = 0; th < (vsite->nthreads == 1 ? 1 : vsite->nthreads+1); th++)
 +        {
 +            for (i = 0; i < DIM; i++)
 +            {
 +                for (j = 0; j < DIM; j++)
 +                {
 +                    vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
 +                }
 +            }
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_move_f_vsites(cr->dd, f, fshift);
 +    }
 +    else if (vsite->bPDvsitecomm)
 +    {
 +        /* We only move forces here, and they are independent of shifts */
 +        move_construct_f(vsite->vsitecomm, f, cr);
 +    }
 +
 +    inc_nrnb(nrnb, eNR_VSITE2,   vsite_count(idef->il, F_VSITE2));
 +    inc_nrnb(nrnb, eNR_VSITE3,   vsite_count(idef->il, F_VSITE3));
 +    inc_nrnb(nrnb, eNR_VSITE3FD, vsite_count(idef->il, F_VSITE3FD));
 +    inc_nrnb(nrnb, eNR_VSITE3FAD, vsite_count(idef->il, F_VSITE3FAD));
 +    inc_nrnb(nrnb, eNR_VSITE3OUT, vsite_count(idef->il, F_VSITE3OUT));
 +    inc_nrnb(nrnb, eNR_VSITE4FD, vsite_count(idef->il, F_VSITE4FD));
 +    inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(idef->il, F_VSITE4FDN));
 +    inc_nrnb(nrnb, eNR_VSITEN,   vsite_count(idef->il, F_VSITEN));
 +}
 +
 +static int *atom2cg(t_block *cgs)
 +{
 +    int *a2cg, cg, i;
 +
 +    snew(a2cg, cgs->index[cgs->nr]);
 +    for (cg = 0; cg < cgs->nr; cg++)
 +    {
 +        for (i = cgs->index[cg]; i < cgs->index[cg+1]; i++)
 +        {
 +            a2cg[i] = cg;
 +        }
 +    }
 +
 +    return a2cg;
 +}
 +
 +static int count_intercg_vsite(gmx_mtop_t *mtop,
 +                               gmx_bool   *bHaveChargeGroups)
 +{
 +    int             mb, mt, ftype, nral, i, cg, a;
 +    gmx_molblock_t *molb;
 +    gmx_moltype_t  *molt;
 +    int            *a2cg;
 +    t_ilist        *il;
 +    t_iatom        *ia;
 +    int             n_intercg_vsite;
 +
 +    *bHaveChargeGroups = FALSE;
 +
 +    n_intercg_vsite = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +
 +        if (molt->cgs.nr < molt->atoms.nr)
 +        {
 +            *bHaveChargeGroups = TRUE;
 +        }
 +
 +        a2cg = atom2cg(&molt->cgs);
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_VSITE)
 +            {
 +                nral = NRAL(ftype);
 +                il   = &molt->ilist[ftype];
 +                ia   = il->iatoms;
 +                for (i = 0; i < il->nr; i += 1+nral)
 +                {
 +                    cg = a2cg[ia[1+i]];
 +                    for (a = 1; a < nral; a++)
 +                    {
 +                        if (a2cg[ia[1+a]] != cg)
 +                        {
 +                            n_intercg_vsite += molb->nmol;
 +                            break;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        sfree(a2cg);
 +    }
 +
 +    return n_intercg_vsite;
 +}
 +
 +static int **get_vsite_pbc(t_iparams *iparams, t_ilist *ilist,
 +                           t_atom *atom, t_mdatoms *md,
 +                           t_block *cgs, int *a2cg)
 +{
 +    int      ftype, nral, i, j, vsi, vsite, cg_v, cg_c, a, nc3 = 0;
 +    t_ilist *il;
 +    t_iatom *ia;
 +    int    **vsite_pbc, *vsite_pbc_f;
 +    char    *pbc_set;
 +    gmx_bool bViteOnlyCG_and_FirstAtom;
 +
 +    /* Make an array that tells if the pbc of an atom is set */
 +    snew(pbc_set, cgs->index[cgs->nr]);
 +    /* PBC is set for all non vsites */
 +    for (a = 0; a < cgs->index[cgs->nr]; a++)
 +    {
 +        if ((atom && atom[a].ptype != eptVSite) ||
 +            (md   && md->ptype[a]  != eptVSite))
 +        {
 +            pbc_set[a] = 1;
 +        }
 +    }
 +
 +    snew(vsite_pbc, F_VSITEN-F_VSITE2+1);
 +
 +    for (ftype = 0; ftype < F_NRE; ftype++)
 +    {
 +        if (interaction_function[ftype].flags & IF_VSITE)
 +        {
 +            nral = NRAL(ftype);
 +            il   = &ilist[ftype];
 +            ia   = il->iatoms;
 +
 +            snew(vsite_pbc[ftype-F_VSITE2], il->nr/(1+nral));
 +            vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
 +
 +            i = 0;
 +            while (i < il->nr)
 +            {
 +                vsi   = i/(1+nral);
 +                vsite = ia[i+1];
 +                cg_v  = a2cg[vsite];
 +                /* A value of -2 signals that this vsite and its contructing
 +                 * atoms are all within the same cg, so no pbc is required.
 +                 */
 +                vsite_pbc_f[vsi] = -2;
 +                /* Check if constructing atoms are outside the vsite's cg */
 +                nc3 = 0;
 +                if (ftype == F_VSITEN)
 +                {
 +                    nc3 = 3*iparams[ia[i]].vsiten.n;
 +                    for (j = 0; j < nc3; j += 3)
 +                    {
 +                        if (a2cg[ia[i+j+2]] != cg_v)
 +                        {
 +                            vsite_pbc_f[vsi] = -1;
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    for (a = 1; a < nral; a++)
 +                    {
 +                        if (a2cg[ia[i+1+a]] != cg_v)
 +                        {
 +                            vsite_pbc_f[vsi] = -1;
 +                        }
 +                    }
 +                }
 +                if (vsite_pbc_f[vsi] == -1)
 +                {
 +                    /* Check if this is the first processed atom of a vsite only cg */
 +                    bViteOnlyCG_and_FirstAtom = TRUE;
 +                    for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
 +                    {
 +                        /* Non-vsites already have pbc set, so simply check for pbc_set */
 +                        if (pbc_set[a])
 +                        {
 +                            bViteOnlyCG_and_FirstAtom = FALSE;
 +                            break;
 +                        }
 +                    }
 +                    if (bViteOnlyCG_and_FirstAtom)
 +                    {
 +                        /* First processed atom of a vsite only charge group.
 +                         * The pbc of the input coordinates to construct_vsites
 +                         * should be preserved.
 +                         */
 +                        vsite_pbc_f[vsi] = vsite;
 +                    }
 +                    else if (cg_v != a2cg[ia[1+i+1]])
 +                    {
 +                        /* This vsite has a different charge group index
 +                         * than it's first constructing atom
 +                         * and the charge group has more than one atom,
 +                         * search for the first normal particle
 +                         * or vsite that already had its pbc defined.
 +                         * If nothing is found, use full pbc for this vsite.
 +                         */
 +                        for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
 +                        {
 +                            if (a != vsite && pbc_set[a])
 +                            {
 +                                vsite_pbc_f[vsi] = a;
 +                                if (gmx_debug_at)
 +                                {
 +                                    fprintf(debug, "vsite %d match pbc with atom %d\n",
 +                                            vsite+1, a+1);
 +                                }
 +                                break;
 +                            }
 +                        }
 +                        if (gmx_debug_at)
 +                        {
 +                            fprintf(debug, "vsite atom %d  cg %d - %d pbc atom %d\n",
 +                                    vsite+1, cgs->index[cg_v]+1, cgs->index[cg_v+1],
 +                                    vsite_pbc_f[vsi]+1);
 +                        }
 +                    }
 +                }
 +                if (ftype == F_VSITEN)
 +                {
 +                    /* The other entries in vsite_pbc_f are not used for center vsites */
 +                    i += nc3;
 +                }
 +                else
 +                {
 +                    i += 1+nral;
 +                }
 +
 +                /* This vsite now has its pbc defined */
 +                pbc_set[vsite] = 1;
 +            }
 +        }
 +    }
 +
 +    sfree(pbc_set);
 +
 +    return vsite_pbc;
 +}
 +
 +
 +gmx_vsite_t *init_vsite(gmx_mtop_t *mtop, t_commrec *cr,
 +                        gmx_bool bSerial_NoPBC)
 +{
 +    int            nvsite, i;
 +    int           *a2cg, cg;
 +    gmx_vsite_t   *vsite;
 +    int            mt;
 +    gmx_moltype_t *molt;
 +    int            nthreads;
 +
 +    /* check if there are vsites */
 +    nvsite = 0;
 +    for (i = 0; i < F_NRE; i++)
 +    {
 +        if (interaction_function[i].flags & IF_VSITE)
 +        {
 +            nvsite += gmx_mtop_ftype_count(mtop, i);
 +        }
 +    }
 +
 +    if (nvsite == 0)
 +    {
 +        return NULL;
 +    }
 +
 +    snew(vsite, 1);
 +
 +    vsite->n_intercg_vsite = count_intercg_vsite(mtop,
 +                                                 &vsite->bHaveChargeGroups);
 +
 +    /* If we don't have charge groups, the vsite follows its own pbc */
 +    if (!bSerial_NoPBC &&
 +        vsite->bHaveChargeGroups &&
 +        vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
 +    {
 +        vsite->nvsite_pbc_molt = mtop->nmoltype;
 +        snew(vsite->vsite_pbc_molt, vsite->nvsite_pbc_molt);
 +        for (mt = 0; mt < mtop->nmoltype; mt++)
 +        {
 +            molt = &mtop->moltype[mt];
 +            /* Make an atom to charge group index */
 +            a2cg = atom2cg(&molt->cgs);
 +            vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
 +                                                      molt->ilist,
 +                                                      molt->atoms.atom, NULL,
 +                                                      &molt->cgs, a2cg);
 +            sfree(a2cg);
 +        }
 +
 +        snew(vsite->vsite_pbc_loc_nalloc, F_VSITEN-F_VSITE2+1);
 +        snew(vsite->vsite_pbc_loc, F_VSITEN-F_VSITE2+1);
 +    }
 +
 +    if (bSerial_NoPBC)
 +    {
 +        vsite->nthreads = 1;
 +    }
 +    else
 +    {
 +        vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
 +    }
 +    if (!bSerial_NoPBC)
 +    {
 +        /* We need one extra thread data structure for the overlap vsites */
 +        snew(vsite->tdata, vsite->nthreads+1);
 +    }
 +
 +    vsite->th_ind        = NULL;
 +    vsite->th_ind_nalloc = 0;
 +
 +    return vsite;
 +}
 +
 +static void prepare_vsite_thread(const t_ilist      *ilist,
 +                                 gmx_vsite_thread_t *vsite_th)
 +{
 +    int ftype;
 +
 +    for (ftype = 0; ftype < F_NRE; ftype++)
 +    {
 +        if (interaction_function[ftype].flags & IF_VSITE)
 +        {
 +            if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
 +            {
 +                vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
 +                srenew(vsite_th->ilist[ftype].iatoms, vsite_th->ilist[ftype].nalloc);
 +            }
 +
 +            vsite_th->ilist[ftype].nr = 0;
 +        }
 +    }
 +}
 +
 +void split_vsites_over_threads(const t_ilist   *ilist,
 +                               const t_mdatoms *mdatoms,
 +                               gmx_bool         bLimitRange,
 +                               gmx_vsite_t     *vsite)
 +{
 +    int      th;
 +    int      vsite_atom_range, natperthread;
 +    int     *th_ind;
 +    int      ftype;
 +    t_iatom *iat;
 +    t_ilist *il_th;
 +    int      nral1, inc, i, j;
 +
 +    if (vsite->nthreads == 1)
 +    {
 +        /* Nothing to do */
 +        return;
 +    }
 +
 +#pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
 +    for (th = 0; th < vsite->nthreads; th++)
 +    {
 +        prepare_vsite_thread(ilist, &vsite->tdata[th]);
 +    }
 +    /* Master threads does the (potential) overlap vsites */
 +    prepare_vsite_thread(ilist, &vsite->tdata[vsite->nthreads]);
 +
 +    /* The current way of distributing the vsites over threads in primitive.
 +     * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
 +     * without taking into account how the vsites are distributed.
 +     * Without domain decomposition we bLimitRange=TRUE and we at least
 +     * tighten the upper bound of the range (useful for common systems
 +     * such as a vsite-protein in 3-site water).
 +     */
 +    if (bLimitRange)
 +    {
 +        vsite_atom_range = -1;
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_VSITE) &&
 +                ftype != F_VSITEN)
 +            {
 +                nral1 = 1 + NRAL(ftype);
 +                iat   = ilist[ftype].iatoms;
 +                for (i = 0; i < ilist[ftype].nr; i += nral1)
 +                {
 +                    for (j = i+1; j < i+nral1; j++)
 +                    {
 +                        vsite_atom_range = max(vsite_atom_range, iat[j]);
 +                    }
 +                }
 +            }
 +        }
 +        vsite_atom_range++;
 +    }
 +    else
 +    {
 +        vsite_atom_range = mdatoms->homenr;
 +    }
 +    natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "virtual site thread dist: natoms %d, range %d, natperthread %d\n", mdatoms->nr, vsite_atom_range, natperthread);
 +    }
 +
 +    /* To simplify the vsite assignment, we make an index which tells us
 +     * to which thread particles, both non-vsites and vsites, are assigned.
 +     */
 +    if (mdatoms->nr > vsite->th_ind_nalloc)
 +    {
 +        vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
 +        srenew(vsite->th_ind, vsite->th_ind_nalloc);
 +    }
 +    th_ind = vsite->th_ind;
 +    th     = 0;
 +    for (i = 0; i < mdatoms->nr; i++)
 +    {
 +        if (mdatoms->ptype[i] == eptVSite)
 +        {
 +            /* vsites are not assigned to a thread yet */
 +            th_ind[i] = -1;
 +        }
 +        else
 +        {
 +            /* assign non-vsite particles to thread th */
 +            th_ind[i] = th;
 +        }
 +        if (i == (th + 1)*natperthread && th < vsite->nthreads)
 +        {
 +            th++;
 +        }
 +    }
 +
 +    for (ftype = 0; ftype < F_NRE; ftype++)
 +    {
 +        if ((interaction_function[ftype].flags & IF_VSITE) &&
 +            ftype != F_VSITEN)
 +        {
 +            nral1 = 1 + NRAL(ftype);
 +            inc   = nral1;
 +            iat   = ilist[ftype].iatoms;
 +            for (i = 0; i < ilist[ftype].nr; )
 +            {
 +                th = iat[1+i]/natperthread;
 +                /* We would like to assign this vsite the thread th,
 +                 * but it might depend on atoms outside the atom range of th
 +                 * or on another vsite not assigned to thread th.
 +                 */
 +                if (ftype != F_VSITEN)
 +                {
 +                    for (j = i+2; j < i+nral1; j++)
 +                    {
 +                        if (th_ind[iat[j]] != th)
 +                        {
 +                            /* Some constructing atoms are not assigned to
 +                             * thread th, move this vsite to a separate batch.
 +                             */
 +                            th = vsite->nthreads;
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    inc = iat[i];
 +                    for (j = i+2; j < i+inc; j += 3)
 +                    {
 +                        if (th_ind[iat[j]] != th)
 +                        {
 +                            th = vsite->nthreads;
 +                        }
 +                    }
 +                }
 +                /* Copy this vsite to the thread data struct of thread th */
 +                il_th = &vsite->tdata[th].ilist[ftype];
 +                for (j = i; j < i+inc; j++)
 +                {
 +                    il_th->iatoms[il_th->nr++] = iat[j];
 +                }
 +                /* Update this vsite's thread index entry */
 +                th_ind[iat[1+i]] = th;
 +
 +                i += inc;
 +            }
 +        }
 +    }
 +
 +    if (debug)
 +    {
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_VSITE) &&
 +                ilist[ftype].nr > 0)
 +            {
 +                fprintf(debug, "%-20s thread dist:",
 +                        interaction_function[ftype].longname);
 +                for (th = 0; th < vsite->nthreads+1; th++)
 +                {
 +                    fprintf(debug, " %4d", vsite->tdata[th].ilist[ftype].nr);
 +                }
 +                fprintf(debug, "\n");
 +            }
 +        }
 +    }
 +}
 +
 +void set_vsite_top(gmx_vsite_t *vsite, gmx_localtop_t *top, t_mdatoms *md,
 +                   t_commrec *cr)
 +{
 +    int *a2cg;
 +
 +    if (vsite->n_intercg_vsite > 0)
 +    {
 +        if (vsite->bHaveChargeGroups)
 +        {
 +            /* Make an atom to charge group index */
 +            a2cg                 = atom2cg(&top->cgs);
 +            vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
 +                                                 top->idef.il, NULL, md,
 +                                                 &top->cgs, a2cg);
 +            sfree(a2cg);
 +        }
 +
 +        if (PARTDECOMP(cr))
 +        {
 +            snew(vsite->vsitecomm, 1);
 +            vsite->bPDvsitecomm =
 +                setup_parallel_vsites(&(top->idef), cr, vsite->vsitecomm);
 +        }
 +    }
 +
 +    if (vsite->nthreads > 1)
 +    {
 +        if (vsite->bHaveChargeGroups || PARTDECOMP(cr))
 +        {
 +            gmx_incons("Can not use threads virtual sites combined with charge groups or particle decomposition");
 +        }
 +
 +        split_vsites_over_threads(top->idef.il, md, !DOMAINDECOMP(cr), vsite);
 +    }
 +}
index 292e23d06d61a6281c34cde7ea714c59863dc4ba,0000000000000000000000000000000000000000..fd9d033b50395a90a6a3758cf924fcf4364f8611
mode 100644,000000..100644
--- /dev/null
@@@ -1,2226 -1,0 +1,2230 @@@
-     /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "md_support.h"
 +#include "md_logging.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "pme_loadbal.h"
 +#include "bondf.h"
 +#include "membed.h"
 +#include "types/nlistheuristics.h"
 +#include "types/iteratedconstraints.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +static void reset_all_counters(FILE *fplog, t_commrec *cr,
 +                               gmx_large_int_t step,
 +                               gmx_large_int_t *step_rel, t_inputrec *ir,
 +                               gmx_wallcycle_t wcycle, t_nrnb *nrnb,
 +                               gmx_runtime_t *runtime,
 +                               nbnxn_cuda_ptr_t cu_nbv)
 +{
 +    char sbuf[STEPSTRSIZE];
 +
 +    /* Reset all the counters related to performance over the run */
 +    md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
 +                  gmx_step_str(step, sbuf));
 +
 +    if (cu_nbv)
 +    {
 +        nbnxn_cuda_reset_timings(cu_nbv);
 +    }
 +
 +    wallcycle_stop(wcycle, ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        reset_dd_statistics_counters(cr->dd);
 +    }
 +    init_nrnb(nrnb);
 +    ir->init_step += *step_rel;
 +    ir->nsteps    -= *step_rel;
 +    *step_rel      = 0;
 +    wallcycle_start(wcycle, ewcRUN);
 +    runtime_start(runtime);
 +    print_date_and_time(fplog, cr->nodeid, "Restarted time", runtime);
 +}
 +
 +double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite, gmx_constr_t constr,
 +             int stepout, t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed, t_forcerec *fr,
 +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
 +             real cpt_period, real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t   *outf;
 +    gmx_large_int_t step, step_rel;
 +    double          run_time;
 +    double          t, t0, lam0[efptNR];
 +    gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEner;
 +    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
 +                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
 +                    bBornRadii, bStartingFromCpt;
 +    gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
 +    gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
 +                      bForceUpdate = FALSE, bCPT;
 +    int               mdof_flags;
 +    gmx_bool          bMasterState;
 +    int               force_flags, cglo_flags;
 +    tensor            force_vir, shake_vir, total_vir, tmp_vir, pres;
 +    int               i, m;
 +    t_trxstatus      *status;
 +    rvec              mu_tot;
 +    t_vcm            *vcm;
 +    t_state          *bufstate = NULL;
 +    matrix           *scale_tot, pcoupl_mu, M, ebox;
 +    gmx_nlheur_t      nlh;
 +    t_trxframe        rerun_fr;
 +    gmx_repl_ex_t     repl_ex = NULL;
 +    int               nchkpt  = 1;
 +    gmx_localtop_t   *top;
 +    t_mdebin         *mdebin = NULL;
 +    df_history_t      df_history;
 +    t_state          *state    = NULL;
 +    rvec             *f_global = NULL;
 +    int               n_xtc    = -1;
 +    rvec             *x_xtc    = NULL;
 +    gmx_enerdata_t   *enerd;
 +    rvec             *f = NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t      upd   = NULL;
 +    t_graph          *graph = NULL;
 +    globsig_t         gs;
 +    gmx_rng_t         mcrng = NULL;
 +    gmx_bool          bFFscan;
 +    gmx_groups_t     *groups;
 +    gmx_ekindata_t   *ekind, *ekind_save;
 +    gmx_shellfc_t     shellfc;
 +    int               count, nconverged = 0;
 +    real              timestep = 0;
 +    double            tcount   = 0;
 +    gmx_bool          bIonize  = FALSE;
 +    gmx_bool          bTCR     = FALSE, bConverged = TRUE, bOK, bSumEkinhOld, bExchanged;
 +    gmx_bool          bAppend;
 +    gmx_bool          bResetCountersHalfMaxH = FALSE;
 +    gmx_bool          bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter;
 +    gmx_bool          bUpdateDoLR;
 +    real              mu_aver = 0, dvdl;
 +    int               a0, a1, gnx = 0, ii;
 +    atom_id          *grpindex = NULL;
 +    char             *grpname;
 +    t_coupl_rec      *tcr     = NULL;
 +    rvec             *xcopy   = NULL, *vcopy = NULL, *cbuf = NULL;
 +    matrix            boxcopy = {{0}}, lastbox;
 +    tensor            tmpvir;
 +    real              fom, oldfom, veta_save, pcurr, scalevir, tracevir;
 +    real              vetanew = 0;
 +    int               lamnew  = 0;
 +    /* for FEP */
 +    int               nstfep;
 +    real              rate;
 +    double            cycles;
 +    real              saved_conserved_quantity = 0;
 +    real              last_ekin                = 0;
 +    int               iter_i;
 +    t_extmass         MassQ;
 +    int             **trotter_seq;
 +    char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
 +    int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t     iterate;
 +    gmx_large_int_t   multisim_nsteps = -1;                        /* number of steps to do  before first multisim
 +                                                                      simulation stops. If equal to zero, don't
 +                                                                      communicate any more between multisims.*/
 +    /* PME load balancing data for GPU kernels */
 +    pme_load_balancing_t pme_loadbal = NULL;
 +    double               cycles_pmes;
 +    gmx_bool             bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
 +
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle, ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf, top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */
 +    bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to
 +                                          false in this step.  The correct value, true or false,
 +                                          is set at each step, as it depends on the frequency of temperature
 +                                          and pressure control.*/
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
 +
 +    nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
 +            &(state_global->fep_state), lam0,
 +            nrnb, top_global, &upd,
 +            nfile, fnm, &outf, &mdebin,
 +            force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, state_global, Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd, 1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
 +                  enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f, top_global->natoms);
 +    }
 +
 +    /* lambda Monte carlo random number generator  */
 +    if (ir->bExpanded)
 +    {
 +        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
 +    }
 +    /* copy the state into df_history */
 +    copy_df_history(&df_history, &state_global->dfhist);
 +
 +    /* Kinetic energy data */
 +    snew(ekind, 1);
 +    init_ekindata(fplog, top_global, &(ir->opts), ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save, 1);
 +    init_ekindata(fplog, top_global, &(ir->opts), ekind_save);
 +    /* Copy the cos acceleration to the groups struct */
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global, n_flexible_constraints(constr),
 +                                 (ir->bContinuation ||
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1);
 +        if ((io > 2000) && MASTER(cr))
 +        {
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state, 1);
 +        dd_init_local_state(cr->dd, state_global, state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout)
 +        {
 +            snew(f_global, state_global->natoms);
 +        }
 +    }
 +    else
 +    {
 +        if (PAR(cr))
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog, top_global, ir, cr);
 +
 +            pd_cg_range(cr, &fr->cg0, &fr->hcg);
 +            pd_at_range(cr, &a0, &a1);
 +        }
 +        else
 +        {
 +            top = gmx_mtop_generate_local_top(top_global, ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        forcerec_set_excl_load(fr, top, cr);
 +
 +        state    = partdec_init_local_state(cr, state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global, ir, 0, NULL, a0, a1-a0, mdatoms);
 +
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite, top, mdatoms, cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 +        {
 +            graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
 +        }
 +
 +        if (shellfc)
 +        {
 +            make_local_shells(cr, mdatoms, shellfc);
 +        }
 +
 +        init_bonded_thread_force_reduction(fr, &top->idef);
 +
 +        if (ir->pull && PAR(cr))
 +        {
 +            dd_make_local_pull_groups(NULL, ir->pull, mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
 +                            state_global, top_global, ir,
 +                            state, &f, mdatoms, top, fr,
 +                            vsite, shellfc, constr,
 +                            nrnb, wcycle, FALSE);
 +
 +    }
 +
 +    update_mdatoms(mdatoms, state->lambda[efptMASS]);
 +
 +    if (opt2bSet("-cpi", nfile, fnm))
 +    {
 +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
 +    }
 +    else
 +    {
 +        bStateFromCP = FALSE;
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (bStateFromCP)
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if (Flags & MD_APPENDFILES)
 +            {
 +                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist, mdebin);
 +    }
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG))
 +    {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd, state);
 +    }
 +
 +    if (state->flags & (1<<estMC_RNG))
 +    {
 +        set_mc_state(mcrng, state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr)
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr, top, ir, mdatoms, cr);
 +        }
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT, nfile, fnm);
 +    if (bTCR)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex, gnx);
 +        for (i = 0; (i < gnx); i++)
 +        {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog, cr, "nstcalcenergy", ir->nstcalcenergy,
 +                        "repl_ex_nst", &repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir,
 +                                        repl_ex_nst, repl_ex_nex, repl_ex_seed);
 +    }
 +
-                   do_per_step(step, nstglobalcomm) ||
++    /* PME tuning is only supported with GPUs or PME nodes and not with rerun.
++     * With perturbed charges with soft-core we should not change the cut-off.
++     */
 +    if ((Flags & MD_TUNEPME) &&
 +        EEL_PME(fr->eeltype) &&
 +        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
++        !(ir->efep != efepNO && mdatoms->nChargePerturbed > 0 && ir->fepvals->bScCoul) &&
 +        !bRerunMD)
 +    {
 +        pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
 +        cycles_pmes = 0;
 +        if (cr->duty & DUTY_PME)
 +        {
 +            /* Start tuning right away, as we can't measure the load */
 +            bPMETuneRunning = TRUE;
 +        }
 +        else
 +        {
 +            /* Separate PME nodes, we can measure the PP/PME load balance */
 +            bPMETuneTry = TRUE;
 +        }
 +    }
 +
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for (m = 0; m < DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog, constr, ir, mdatoms, state, f,
 +                               graph, cr, nrnb, fr, top, shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, NULL,
 +                             top->idef.iparams, top->idef.il,
 +                             fr->ePBC, fr->bMolPBC, graph, cr, state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +
 +    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
 +    nstfep = ir->fepvals->nstdhdl;
 +    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
 +    {
 +        nstfep = ir->expandedvals->nstexpanded;
 +    }
 +    if (repl_ex_nst > 0 && nstfep > repl_ex_nst)
 +    {
 +        nstfep = repl_ex_nst;
 +    }
 +
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0)
 +                  | (bVV ? CGLO_PRESSURE : 0)
 +                  | (bVV ? CGLO_CONSTRAINT : 0)
 +                  | (bRerunMD ? CGLO_RERUNMD : 0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
 +
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                    NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                    constr, NULL, FALSE, state->box,
 +                    top_global, &pcurr, top_global->natoms, &bSumEkinhOld, cglo_flags);
 +    if (ir->eI == eiVVAK)
 +    {
 +        /* a second call to get the half step temperature initialized as well */
 +        /* we do the same call as above, but turn the pressure off -- internally to
 +           compute_globals, this is recognized as a velocity verlet half-step
 +           kinetic energy calculation.  This minimized excess variables, but
 +           perhaps loses some logic?*/
 +
 +        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                        NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                        constr, NULL, FALSE, state->box,
 +                        top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                        cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT))
 +    {
 +        for (i = 0; (i < ir->opts.ngtc); i++)
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
 +        }
 +    }
 +    if (ir->eI != eiVV)
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterativeCase)
 +    {
 +        bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan)
 +    {
 +        snew(xcopy, state->natoms);
 +        snew(vcopy, state->natoms);
 +        copy_rvecn(state->x, xcopy, 0, state->natoms);
 +        copy_rvecn(state->v, vcopy, 0, state->natoms);
 +        copy_mat(state->box, boxcopy);
 +    }
 +
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
 +
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr, FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr, "starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name), opt2fn("-rerun", nfile, fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr, "Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr, "starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf, "%s", "infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
 +                        gmx_step_str(ir->init_step, sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr, "%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps, sbuf), tbuf);
 +            }
 +        }
 +        fprintf(fplog, "\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog, cr->nodeid, "Started mdrun", runtime);
 +    wallcycle_start(wcycle, ewcRUN);
 +    if (fplog)
 +    {
 +        fprintf(fplog, "\n");
 +    }
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret = fcCheckPointParallel( cr->nodeid,
 +                                      NULL, 0);
 +    if (chkpt_ret == 0)
 +    {
 +        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
 +    }
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv, &status,
 +                                             opt2fn("-rerun", nfile, fnm),
 +                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms, top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box, fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX    = !bStateFromCP;
 +    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep        = FALSE;
 +    bSumEkinhOld     = FALSE;
 +    bExchanged       = FALSE;
 +
 +    init_global_signals(&gs, cr, ir, repl_ex_nst);
 +
 +    step     = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh, bGStatEveryStep, step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame))
 +    {
 +
 +        wallcycle_start(wcycle, ewcSTEP);
 +
 +        if (bRerunMD)
 +        {
 +            if (rerun_fr.bStep)
 +            {
 +                step     = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime)
 +            {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        }
 +        else
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t         = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO || ir->bSimTemp)
 +        {
 +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
 +               requiring different logic. */
 +
 +            set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0);
 +            bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
 +            bDoFEP       = (do_per_step(step, nstfep) && (ir->efep != efepNO));
 +            bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
 +        }
 +
 +        if (bSimAnn)
 +        {
 +            update_annealing_target_temp(&(ir->opts), t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for (i = 0; i < state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i], state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for (i = 0; i < state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i], state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for (i = 0; i < state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box, state_global->box);
 +            copy_mat(state_global->box, state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
 +                    shift_self(graph, state->box, state->x);
 +                }
 +                construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v,
 +                                 top->idef.iparams, top->idef.il,
 +                                 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph, state->box, state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for (ii = 0; (ii < state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii], state->x[ii]);
 +                copy_rvec(vcopy[ii], state->v[ii]);
 +            }
 +            copy_mat(boxcopy, state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS      = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +
 +            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh, bFirstStep || bExchanged || bDoFEP, step);
 +            }
 +        }
 +
 +        /* check whether we should stop because another simulation has
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
 +                 (multisim_nsteps != ir->nsteps) )
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr,
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep         = TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist == 0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii = bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
 +        {
 +            bBornRadii = TRUE;
 +        }
 +
 +        do_log     = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +            (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog, step, state->box, graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd, state, state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle, ewcDOMDEC);
 +                dd_partition_system(fplog, step, cr,
 +                                    bMasterState, nstglobalcomm,
 +                                    state_global, top_global, ir,
 +                                    state, &f, mdatoms, top, fr,
 +                                    vsite, shellfc, constr,
 +                                    nrnb, wcycle,
 +                                    do_verbose && !bPMETuneRunning);
 +                wallcycle_stop(wcycle, ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms, state->lambda[efptMASS]);
 +        }
 +
 +        if ((bRerunMD && rerun_fr.bV) || bExchanged)
 +        {
 +
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 +                            constr, NULL, FALSE, state->box,
 +                            top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog, oenv, mdatoms, top_global, t, ir, state->x, state->v,
 +                   mdatoms->start, mdatoms->start+mdatoms->homenr, state->box, cr);
 +        }
 +
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile, fnm, fr,
 +                                  mdatoms->nr, state->x, state->box))
 +            {
 +                gmx_finalize_par();
 +
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +        if (EI_VV(ir->eI) && (!bInitStep))
 +        {
 +            /* for vv, the first half of the integration actually corresponds
 +               to the previous step.  bCalcEner is only required to be evaluated on the 'next' step,
 +               but the virial needs to be calculated on both the current step and the 'next' step. Future
 +               reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */
 +
 +            bCalcEner = do_per_step(step-1, ir->nstcalcenergy);
 +            bCalcVir  = bCalcEner ||
 +                (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
 +        }
 +        else
 +        {
 +            bCalcEner = do_per_step(step, ir->nstcalcenergy);
 +            bCalcVir  = bCalcEner ||
 +                (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
 +        }
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcVir || bCalcEner || bStopCM ||
-         pme_loadbal_done(pme_loadbal, fplog);
++                  do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcVir  = TRUE;
 +            bCalcEner = TRUE;
 +            bGStat    = TRUE;
 +        }
 +
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +                      );
 +
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
 +                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
 +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
 +                       );
 +
 +        if (fr->bTwinRange)
 +        {
 +            if (do_per_step(step, ir->nstcalclr))
 +            {
 +                force_flags |= GMX_FORCE_DO_LR;
 +            }
 +        }
 +
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count = relax_shell_flexcon(fplog, cr, bVerbose, bFFscan ? step+1 : step,
 +                                        ir, bNS, force_flags,
 +                                        bStopCM, top, top_global,
 +                                        constr, enerd, fcd,
 +                                        state, f, force_vir, mdatoms,
 +                                        nrnb, wcycle, graph, groups,
 +                                        shellfc, fr, bBornRadii, t, mu_tot,
 +                                        state->natoms, &bConverged, vsite,
 +                                        outf->fp_field);
 +            tcount += count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too.
 +             * Check comments in sim_util.c
 +             */
 +            do_force(fplog, cr, ir, step, nrnb, wcycle, top, top_global, groups,
 +                     state->box, state->x, &state->hist,
 +                     f, force_vir, mdatoms, enerd, fcd,
 +                     state->lambda, graph,
 +                     fr, vsite, mu_tot, t, outf->fp_field, ed, bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr, state->x, mdatoms->chargeA,
 +                                   mu_tot, &top_global->mols, mdatoms, gnx, grpindex);
 +        }
 +
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr = init_coupling(fplog, nfile, fnm, cr, fr, mdatoms, &(top->idef));
 +            fprintf(fplog, "Done init_coupling\n");
 +            fflush(fplog);
 +        }
 +
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI == eiVV && bInitStep)
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */
 +            }
 +            else
 +            {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
 +            }
 +
 +            /* If we are using twin-range interactions where the long-range component
 +             * is only evaluated every nstcalclr>1 steps, we should do a special update
 +             * step to combine the long-range forces on these steps.
 +             * For nstcalclr=1 this is not done, since the forces would have been added
 +             * directly to the short-range forces already.
 +             */
 +            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
 +                          f, bUpdateDoLR, fr->f_twin, fcd,
 +                          ekind, M, wcycle, upd, bInitStep, etrtVELOCITY1,
 +                          cr, nrnb, constr, &top->idef);
 +
 +            if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep)
 +            {
 +                gmx_iterate_init(&iterate, TRUE);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +
 +            /* save the state */
 +            if (iterate.bIterationActive)
 +            {
 +                copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
 +            }
 +
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || iterate.bIterationActive)
 +            {
 +                if (iterate.bIterationActive)
 +                {
 +                    copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
 +                    if (bFirstIterate && bTrotter)
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +
 +                        veta_save = state->veta;
 +                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0);
 +                        vetanew     = state->veta;
 +                        state->veta = veta_save;
 +                    }
 +                }
 +
 +                bOK = TRUE;
 +                if (!bRerunMD || rerun_fr.bV || bForceUpdate)     /* Why is rerun_fr.bV here?  Unclear. */
 +                {
 +                    dvdl = 0;
 +
 +                    update_constraints(fplog, step, &dvdl, ir, ekind, mdatoms,
 +                                       state, fr->bMolPBC, graph, f,
 +                                       &top->idef, shake_vir, NULL,
 +                                       cr, nrnb, wcycle, upd, constr,
 +                                       bInitStep, TRUE, bCalcVir, vetanew);
 +
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +
 +                }
 +                else if (graph)
 +                {
 +                    /* Need to unshift here if a do_force has been
 +                       called in the previous step */
 +                    unshift_self(graph, state->box, state->x);
 +                }
 +
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
 +                if (bCalcEner && ir->eI == eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
 +                {
 +                    bSumEkinhOld = TRUE;
 +                }
 +                /* for vv, the first half of the integration actually corresponds to the previous step.
 +                   So we need information from the last step in the first half of the integration */
 +                if (bGStat || do_per_step(step-1, nstglobalcomm))
 +                {
 +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                    constr, NULL, FALSE, state->box,
 +                                    top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                    cglo_flags
 +                                    | CGLO_ENERGY
 +                                    | (bTemp ? CGLO_TEMPERATURE : 0)
 +                                    | (bPres ? CGLO_PRESSURE : 0)
 +                                    | (bPres ? CGLO_CONSTRAINT : 0)
 +                                    | ((iterate.bIterationActive) ? CGLO_ITERATE : 0)
 +                                    | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                    | CGLO_SCALEEKIN
 +                                    );
 +                    /* explanation of above:
 +                       a) We compute Ekin at the full time step
 +                       if 1) we are using the AveVel Ekin, and it's not the
 +                       initial step, or 2) if we are using AveEkin, but need the full
 +                       time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                       b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
 +                       EkinAveVel because it's needed for the pressure */
 +                }
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep)
 +                {
 +                    if (bTrotter)
 +                    {
 +                        m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */
 +                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
 +                    }
 +                    else
 +                    {
 +                        if (bExchanged)
 +                        {
 +
 +                            /* We need the kinetic energy at minus the half step for determining
 +                             * the full step kinetic energy and possibly for T-coupling.*/
 +                            /* This may not be quite working correctly yet . . . . */
 +                            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 +                                            constr, NULL, FALSE, state->box,
 +                                            top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +                        }
 +                    }
 +                }
 +
 +                if (iterate.bIterationActive &&
 +                    done_iterating(cr, fplog, step, &iterate, bFirstIterate,
 +                                   state->veta, &vetanew))
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep)
 +            {
 +                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
 +                copy_mat(shake_vir, state->svir_prev);
 +                copy_mat(force_vir, state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
 +                {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE, FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI == eiVV)
 +            {
 +                copy_rvecn(cbuf, state->v, 0, state->natoms);
 +            }
 +
 +            if (fr->bSepDVDL && fplog && do_log)
 +            {
 +                fprintf(fplog, sepdvdlformat, "Constraint", 0.0, dvdl);
 +            }
 +            enerd->term[F_DVDL_BONDED] += dvdl;
 +        }
 +
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV)
 +        {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
 +            if (ir->eI == eiVV)
 +            {
 +                last_ekin = enerd->term[F_EKIN];
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
 +            if (!bRerunMD)
 +            {
 +                sum_dhdl(enerd, state->lambda, ir->fepvals);
 +            }
 +        }
 +
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        if (bDoExpanded)
 +        {
 +            /* perform extended ensemble sampling in lambda - we don't
 +               actually move to the new state before outputting
 +               statistics, but if performing simulated tempering, we
 +               do update the velocities and the tau_t. */
 +
 +            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, &df_history, step, mcrng, state->v, mdatoms);
 +        }
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +
 +        /* Now we have the energies and forces corresponding to the
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step, ir->nstxout))
 +        {
 +            mdof_flags |= MDOF_X;
 +        }
 +        if (do_per_step(step, ir->nstvout))
 +        {
 +            mdof_flags |= MDOF_V;
 +        }
 +        if (do_per_step(step, ir->nstfout))
 +        {
 +            mdof_flags |= MDOF_F;
 +        }
 +        if (do_per_step(step, ir->nstxtcout))
 +        {
 +            mdof_flags |= MDOF_XTC;
 +        }
 +        if (bCPT)
 +        {
 +            mdof_flags |= MDOF_CPT;
 +        }
 +        ;
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +        {
 +            fcReportProgress( ir->nsteps, step );
 +        }
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +        {
 +            fcRequestCheckPoint();
 +        }
 +#endif
 +
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle, ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd, state);
 +                }
 +                if (state->flags  & (1<<estMC_RNG))
 +                {
 +                    get_mc_state(mcrng, state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate, ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist, mdebin);
 +                    if (ir->efep != efepNO || ir->bSimTemp)
 +                    {
 +                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
 +                                                                       structured so this isn't necessary.
 +                                                                       Note this reassignment is only necessary
 +                                                                       for single threads.*/
 +                        copy_df_history(&state_global->dfhist, &df_history);
 +                    }
 +                }
 +            }
 +            write_traj(fplog, cr, outf, mdof_flags, top_global,
 +                       step, t, state, state_global, f, f_global, &n_xtc, &x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr, "\nWriting final coordinates.\n");
 +                if (fr->bMolPBC)
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm),
 +                                    *top_global->name, top_global,
 +                                    state_global->x, state_global->v,
 +                                    ir->ePBC, state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle, ewcTRAJ);
 +        }
 +
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV)
 +        {
 +            copy_mat(state->svir_prev, shake_vir);
 +            copy_mat(state->fvir_prev, force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
 +            {
 +                gs.sig[eglsSTOPCOND] = 1;
 +            }
 +            if (gmx_get_stop_condition() == gmx_stop_cond_next)
 +            {
 +                gs.sig[eglsSTOPCOND] = -1;
 +            }
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition = (int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs,
 +                                                     nlh.scale_tot, state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 ||
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +
 +        /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */
 +        if (EI_VV(ir->eI))
 +        {
 +            if (!bInitStep)
 +            {
 +                update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms);
 +            }
 +            if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
 +            {
 +                gmx_bool bIfRandomize;
 +                bIfRandomize = update_randomize_velocities(ir, step, mdatoms, state, upd, &top->idef, constr);
 +                /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
 +                if (constr && bIfRandomize)
 +                {
 +                    update_constraints(fplog, step, &dvdl, ir, ekind, mdatoms,
 +                                       state, fr->bMolPBC, graph, f,
 +                                       &top->idef, tmp_vir, NULL,
 +                                       cr, nrnb, wcycle, upd, constr,
 +                                       bInitStep, TRUE, bCalcVir, vetanew);
 +                }
 +            }
 +        }
 +
 +        if (bIterativeCase && do_per_step(step, ir->nstpcouple))
 +        {
 +            gmx_iterate_init(&iterate, TRUE);
 +            /* for iterations, we save these vectors, as we will be redoing the calculations */
 +            copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
 +        }
 +
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || iterate.bIterationActive)
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */
 +            if (iterate.bIterationActive)
 +            {
 +                copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box, lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle, ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter)
 +                {
 +                    if (iterate.bIterationActive)
 +                    {
 +                        if (bFirstIterate)
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir, scalevir, shake_vir);
 +                        m_add(force_vir, shake_vir, total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
 +                    /* We can only do Berendsen coupling after we have summed
 +                     * the kinetic energy or virial. Since the happens
 +                     * in global_state after update, we should only do it at
 +                     * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                     */
 +                }
 +                else
 +                {
 +                    update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms);
 +                    update_pcouple(fplog, step, ir, state, pcoupl_mu, M, wcycle,
 +                                   upd, bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +                    /* velocity half-step update */
 +                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 +                                  bUpdateDoLR, fr->f_twin, fcd,
 +                                  ekind, M, wcycle, upd, FALSE, etrtVELOCITY2,
 +                                  cr, nrnb, constr, &top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +
 +                if (ir->eI == eiVVAK)
 +                {
 +                    copy_rvecn(state->x, cbuf, 0, state->natoms);
 +                }
 +                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 +                              bUpdateDoLR, fr->f_twin, fcd,
 +                              ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
 +                wallcycle_stop(wcycle, ewcUPDATE);
 +
 +                update_constraints(fplog, step, &dvdl, ir, ekind, mdatoms, state,
 +                                   fr->bMolPBC, graph, f,
 +                                   &top->idef, shake_vir, force_vir,
 +                                   cr, nrnb, wcycle, upd, constr,
 +                                   bInitStep, FALSE, bCalcVir, state->veta);
 +
 +                if (ir->eI == eiVVAK)
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                    constr, NULL, FALSE, lastbox,
 +                                    top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE
 +                                    );
 +                    wallcycle_start(wcycle, ewcUPDATE);
 +                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf, state->x, 0, state->natoms);
 +
 +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 +                                  bUpdateDoLR, fr->f_twin, fcd,
 +                                  ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
 +                    wallcycle_stop(wcycle, ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure.
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog, step, &dvdl, ir, ekind, mdatoms,
 +                                       state, fr->bMolPBC, graph, f,
 +                                       &top->idef, tmp_vir, force_vir,
 +                                       cr, nrnb, wcycle, upd, NULL,
 +                                       bInitStep, FALSE, bCalcVir,
 +                                       state->veta);
 +                }
 +                if (!bOK && !bFFscan)
 +                {
 +                    gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +
 +                if (fr->bSepDVDL && fplog && do_log)
 +                {
 +                    fprintf(fplog, sepdvdlformat, "Constraint dV/dl", 0.0, dvdl);
 +                }
 +                enerd->term[F_DVDL_BONDED] += dvdl;
 +            }
 +            else if (graph)
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph, state->box, state->x);
 +            }
 +
 +            if (vsite != NULL)
 +            {
 +                wallcycle_start(wcycle, ewcVSITECONSTR);
 +                if (graph != NULL)
 +                {
 +                    shift_self(graph, state->box, state->x);
 +                }
 +                construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v,
 +                                 top->idef.iparams, top->idef.il,
 +                                 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
 +
 +                if (graph != NULL)
 +                {
 +                    unshift_self(graph, state->box, state->x);
 +                }
 +                wallcycle_stop(wcycle, ewcVSITECONSTR);
 +            }
 +
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints  ############ */
 +            /* With Leap-Frog we can skip compute_globals at
 +             * non-communication steps, but we need to calculate
 +             * the kinetic energy one step before communication.
 +             */
 +            if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
 +            {
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    gs.sig[eglsNABNSB] = nlh.nabnsb;
 +                }
 +                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                constr,
 +                                bFirstIterate ? &gs : NULL,
 +                                (step_rel % gs.nstms == 0) &&
 +                                (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
 +                                lastbox,
 +                                top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                cglo_flags
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
 +                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
 +                                | (iterate.bIterationActive ? CGLO_ITERATE : 0)
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_CONSTRAINT
 +                                );
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    nlh.nabnsb         = gs.set[eglsNABNSB];
 +                    gs.set[eglsNABNSB] = 0;
 +                }
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (iterate.bIterationActive &&
 +                done_iterating(cr, fplog, step, &iterate, bFirstIterate,
 +                               trace(shake_vir), &tracevir))
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
 +        /* only add constraint dvdl after constraints */
 +        enerd->term[F_DVDL_BONDED] += dvdl;
 +        if (!bVV || bRerunMD)
 +        {
 +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd, state->lambda, ir->fepvals);
 +        }
 +        update_box(fplog, step, ir, mdatoms, state, graph, f,
 +                   ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, wcycle, upd, bInitStep, FALSE);
 +
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc == NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog, enerd->term, mdatoms->homenr,
 +                                 f, NULL, xcopy,
 +                                 &(top_global->mols), mdatoms->massT, pres))
 +            {
 +                gmx_finalize_par();
 +
 +                fprintf(stderr, "\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,
 +             * so signal that we still have to do it.
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies.
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog, oenv, nfile, fnm, tcr, t, step, enerd->term, fr,
 +                        ir, MASTER(cr),
 +                        mdatoms, &(top->idef), mu_aver,
 +                        top_global->mols.nr, cr,
 +                        state->box, total_vir, pres,
 +                        mu_tot, state->x, f, bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI == eiVV)
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize)
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max)
 +            {
 +                fprintf(stderr, "Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep)
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr, do_or;
 +
 +            if (fplog && do_log && bDoExpanded)
 +            {
 +                /* only needed if doing expanded ensemble */
 +                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL,
 +                                          &df_history, state->fep_state, ir->nstlog, step);
 +            }
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI))))
 +            {
 +                if (bCalcEner)
 +                {
 +                    upd_mdebin(mdebin, bDoDHDL, TRUE,
 +                               t, mdatoms->tmass, enerd, state,
 +                               ir->fepvals, ir->expandedvals, lastbox,
 +                               shake_vir, force_vir, total_vir, pres,
 +                               ekind, mu_tot, constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +
 +                do_dr  = do_per_step(step, ir->nstdisreout);
 +                do_or  = do_per_step(step, ir->nstorireout);
 +
 +                print_ebin(outf->fp_ene, do_ene, do_dr, do_or, do_log ? fplog : NULL,
 +                           step, t,
 +                           eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull, step, t);
 +            }
 +
 +            if (do_per_step(step, ir->nstlog))
 +            {
 +                if (fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +        if (bDoExpanded)
 +        {
 +            /* Have to do this part after outputting the logfile and the edr file */
 +            state->fep_state = lamnew;
 +            for (i = 0; i < efptNR; i++)
 +            {
 +                state_global->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
 +            }
 +        }
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
 +        {
 +            if (shellfc)
 +            {
 +                fprintf(stderr, "\n");
 +            }
 +            print_time(stderr, runtime, step, ir, cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step, repl_ex_nst))
 +        {
 +            bExchanged = replica_exchange(fplog, cr, repl_ex,
 +                                          state_global, enerd,
 +                                          state, step, t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr))
 +            {
 +                dd_partition_system(fplog, step, cr, TRUE, 1,
 +                                    state_global, top_global, ir,
 +                                    state, &f, mdatoms, top, fr,
 +                                    vsite, shellfc, constr,
 +                                    nrnb, wcycle, FALSE);
 +            }
 +        }
 +
 +        bFirstStep       = FALSE;
 +        bInitStep        = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres, state->pres_prev);
 +        }
 +
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed != NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel, membed, state_global->x);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
 +            }
 +        }
 +
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +
 +        cycles = wallcycle_stop(wcycle, ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd, cycles, ddCyclStep);
 +        }
 +
 +        if (bPMETuneRunning || bPMETuneTry)
 +        {
 +            /* PME grid + cut-off optimization with GPUs or PME nodes */
 +
 +            /* Count the total cycles over the last steps */
 +            cycles_pmes += cycles;
 +
 +            /* We can only switch cut-off at NS steps */
 +            if (step % ir->nstlist == 0)
 +            {
 +                /* PME grid + cut-off optimization with GPUs or PME nodes */
 +                if (bPMETuneTry)
 +                {
 +                    if (DDMASTER(cr->dd))
 +                    {
 +                        /* PME node load is too high, start tuning */
 +                        bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
 +                    }
 +                    dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
 +
 +                    if (bPMETuneRunning || step_rel > ir->nstlist*50)
 +                    {
 +                        bPMETuneTry     = FALSE;
 +                    }
 +                }
 +                if (bPMETuneRunning)
 +                {
 +                    /* init_step might not be a multiple of nstlist,
 +                     * but the first cycle is always skipped anyhow.
 +                     */
 +                    bPMETuneRunning =
 +                        pme_load_balance(pme_loadbal, cr,
 +                                         (bVerbose && MASTER(cr)) ? stderr : NULL,
 +                                         fplog,
 +                                         ir, state, cycles_pmes,
 +                                         fr->ic, fr->nbv, &fr->pmedata,
 +                                         step);
 +
 +                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
 +                    fr->ewaldcoeff = fr->ic->ewaldcoeff;
 +                    fr->rlist      = fr->ic->rlist;
 +                    fr->rlistlong  = fr->ic->rlistlong;
 +                    fr->rcoulomb   = fr->ic->rcoulomb;
 +                    fr->rvdw       = fr->ic->rvdw;
 +                }
 +                cycles_pmes = 0;
 +            }
 +        }
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, runtime,
 +                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
 +            wcycle_set_reset_counters(wcycle, -1);
 +            if (!(cr->duty & DUTY_PME))
 +            {
 +                /* Tell our PME node to reset its counters */
 +                gmx_pme_send_resetcounters(cr, step);
 +            }
 +            /* Correct max_hours for the elapsed time */
 +            max_hours                -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH    = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +
 +    /* Stop the time */
 +    runtime_end(runtime);
 +
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_send_finish(cr);
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD)
 +        {
 +            print_ebin(outf->fp_ene, FALSE, FALSE, FALSE, fplog, step, t,
 +                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns);
 +    }
 +
 +    if (pme_loadbal != NULL)
 +    {
++        pme_loadbal_done(pme_loadbal, cr, fplog,
++                         fr->nbv != NULL && fr->nbv->bUseGPU);
 +    }
 +
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog, repl_ex);
 +    }
 +
 +    runtime->nsteps_done = step_rel;
 +
 +    return 0;
 +}
index 8a0cf21554c098405607cf961dd1ecf606f7f033,0000000000000000000000000000000000000000..1a2544f6b4ea642a68adb7cace9a2c09b3d38fe8
mode 100644,000000..100644
--- /dev/null
@@@ -1,763 -1,0 +1,795 @@@
-             setup->rcut_coulomb, setup->rlist,
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 4.6.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2011, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "network.h"
 +#include "calcgrid.h"
 +#include "pme.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "force.h"
 +#include "macros.h"
++#include "md_logging.h"
 +#include "pme_loadbal.h"
 +
 +/* Parameters and setting for one PP-PME setup */
 +typedef struct {
 +    real      rcut_coulomb;    /* Coulomb cut-off                              */
 +    real      rlist;           /* pair-list cut-off                            */
 +    real      rlistlong;       /* LR pair-list cut-off                         */
 +    int       nstcalclr;       /* frequency of evaluating long-range forces for group scheme */
 +    real      spacing;         /* (largest) PME grid spacing                   */
 +    ivec      grid;            /* the PME grid dimensions                      */
 +    real      grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
 +    real      ewaldcoeff;      /* the Ewald coefficient                        */
 +    gmx_pme_t pmedata;         /* the data structure used in the PME code      */
 +
 +    int       count;           /* number of times this setup has been timed    */
 +    double    cycles;          /* the fastest time for this setup in cycles    */
 +} pme_setup_t;
 +
 +/* In the initial scan, step by grids that are at least a factor 0.8 coarser */
 +#define PME_LB_GRID_SCALE_FAC  0.8
 +/* In the initial scan, try to skip grids with uneven x/y/z spacing,
 + * checking if the "efficiency" is more than 5% worse than the previous grid.
 + */
 +#define PME_LB_GRID_EFFICIENCY_REL_FAC  1.05
 +/* Rerun up till 12% slower setups than the fastest up till now */
 +#define PME_LB_SLOW_FAC  1.12
 +/* If setups get more than 2% faster, do another round to avoid
 + * choosing a slower setup due to acceleration or fluctuations.
 + */
 +#define PME_LB_ACCEL_TOL 1.02
 +
 +enum {
 +    epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimNR
 +};
 +
 +const char *pmelblim_str[epmelblimNR] =
 +{ "no", "box size", "domain decompostion" };
 +
 +struct pme_load_balancing {
 +    int          nstage;             /* the current maximum number of stages */
 +
 +    real         cut_spacing;        /* the minimum cutoff / PME grid spacing ratio */
 +    real         rcut_vdw;           /* Vdw cutoff (does not change) */
 +    real         rcut_coulomb_start; /* Initial electrostatics cutoff */
 +    int          nstcalclr_start;    /* Initial electrostatics cutoff */
 +    real         rbuf_coulomb;       /* the pairlist buffer size */
 +    real         rbuf_vdw;           /* the pairlist buffer size */
 +    matrix       box_start;          /* the initial simulation box */
 +    int          n;                  /* the count of setup as well as the allocation size */
 +    pme_setup_t *setup;              /* the PME+cutoff setups */
 +    int          cur;                /* the current setup */
 +    int          fastest;            /* fastest setup up till now */
 +    int          start;              /* start of setup range to consider in stage>0 */
 +    int          end;                /* end   of setup range to consider in stage>0 */
 +    int          elimited;           /* was the balancing limited, uses enum above */
 +    int          cutoff_scheme;      /* Verlet or group cut-offs */
 +
 +    int          stage;              /* the current stage */
 +};
 +
 +void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
 +                      const t_inputrec *ir, matrix box,
 +                      const interaction_const_t *ic,
 +                      gmx_pme_t pmedata)
 +{
 +    pme_load_balancing_t pme_lb;
 +    real                 spm, sp;
 +    int                  d;
 +
 +    snew(pme_lb, 1);
 +
 +    /* Any number of stages >= 2 is supported */
 +    pme_lb->nstage   = 2;
 +
 +    pme_lb->cutoff_scheme = ir->cutoff_scheme;
 +
 +    if (pme_lb->cutoff_scheme == ecutsVERLET)
 +    {
 +        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
 +        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
 +    }
 +    else
 +    {
 +        if (ic->rcoulomb > ic->rlist)
 +        {
 +            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
 +        }
 +        else
 +        {
 +            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
 +        }
 +        if (ic->rvdw > ic->rlist)
 +        {
 +            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
 +        }
 +        else
 +        {
 +            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
 +        }
 +    }
 +
 +    copy_mat(box, pme_lb->box_start);
 +    if (ir->ePBC == epbcXY && ir->nwall == 2)
 +    {
 +        svmul(ir->wall_ewald_zfac, pme_lb->box_start[ZZ], pme_lb->box_start[ZZ]);
 +    }
 +
 +    pme_lb->n = 1;
 +    snew(pme_lb->setup, pme_lb->n);
 +
 +    pme_lb->rcut_vdw              = ic->rvdw;
 +    pme_lb->rcut_coulomb_start    = ir->rcoulomb;
 +    pme_lb->nstcalclr_start       = ir->nstcalclr;
 +
 +    pme_lb->cur                   = 0;
 +    pme_lb->setup[0].rcut_coulomb = ic->rcoulomb;
 +    pme_lb->setup[0].rlist        = ic->rlist;
 +    pme_lb->setup[0].rlistlong    = ic->rlistlong;
 +    pme_lb->setup[0].nstcalclr    = ir->nstcalclr;
 +    pme_lb->setup[0].grid[XX]     = ir->nkx;
 +    pme_lb->setup[0].grid[YY]     = ir->nky;
 +    pme_lb->setup[0].grid[ZZ]     = ir->nkz;
 +    pme_lb->setup[0].ewaldcoeff   = ic->ewaldcoeff;
 +
 +    pme_lb->setup[0].pmedata  = pmedata;
 +
 +    spm = 0;
 +    for (d = 0; d < DIM; d++)
 +    {
 +        sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
 +        if (sp > spm)
 +        {
 +            spm = sp;
 +        }
 +    }
 +    pme_lb->setup[0].spacing = spm;
 +
 +    if (ir->fourier_spacing > 0)
 +    {
 +        pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
 +    }
 +    else
 +    {
 +        pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing;
 +    }
 +
 +    pme_lb->stage = 0;
 +
 +    pme_lb->fastest  = 0;
 +    pme_lb->start    = 0;
 +    pme_lb->end      = 0;
 +    pme_lb->elimited = epmelblimNO;
 +
 +    *pme_lb_p = pme_lb;
 +}
 +
 +static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
 +                                            int                  pme_order)
 +{
 +    pme_setup_t *set;
 +    real         fac, sp;
 +    real         tmpr_coulomb, tmpr_vdw;
 +    int          d;
 +
 +    /* Try to add a new setup with next larger cut-off to the list */
 +    pme_lb->n++;
 +    srenew(pme_lb->setup, pme_lb->n);
 +    set          = &pme_lb->setup[pme_lb->n-1];
 +    set->pmedata = NULL;
 +
 +    fac = 1;
 +    do
 +    {
 +        fac *= 1.01;
 +        clear_ivec(set->grid);
 +        sp = calc_grid(NULL, pme_lb->box_start,
 +                       fac*pme_lb->setup[pme_lb->cur].spacing,
 +                       &set->grid[XX],
 +                       &set->grid[YY],
 +                       &set->grid[ZZ]);
 +
 +        /* In parallel we can't have grids smaller than 2*pme_order,
 +         * and we would anyhow not gain much speed at these grid sizes.
 +         */
 +        for (d = 0; d < DIM; d++)
 +        {
 +            if (set->grid[d] <= 2*pme_order)
 +            {
 +                pme_lb->n--;
 +
 +                return FALSE;
 +            }
 +        }
 +    }
 +    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
 +
 +    set->rcut_coulomb = pme_lb->cut_spacing*sp;
 +
 +    if (pme_lb->cutoff_scheme == ecutsVERLET)
 +    {
 +        set->rlist        = set->rcut_coulomb + pme_lb->rbuf_coulomb;
 +        /* We dont use LR lists with Verlet, but this avoids if-statements in further checks */
 +        set->rlistlong    = set->rlist;
 +    }
 +    else
 +    {
 +        tmpr_coulomb          = set->rcut_coulomb + pme_lb->rbuf_coulomb;
 +        tmpr_vdw              = pme_lb->rcut_vdw + pme_lb->rbuf_vdw;
 +        set->rlist            = min(tmpr_coulomb, tmpr_vdw);
 +        set->rlistlong        = max(tmpr_coulomb, tmpr_vdw);
 +
 +        /* Set the long-range update frequency */
 +        if (set->rlist == set->rlistlong)
 +        {
 +            /* No long-range interactions if the short-/long-range cutoffs are identical */
 +            set->nstcalclr = 0;
 +        }
 +        else if (pme_lb->nstcalclr_start == 0 || pme_lb->nstcalclr_start == 1)
 +        {
 +            /* We were not doing long-range before, but now we are since rlist!=rlistlong */
 +            set->nstcalclr = 1;
 +        }
 +        else
 +        {
 +            /* We were already doing long-range interactions from the start */
 +            if (pme_lb->rcut_vdw > pme_lb->rcut_coulomb_start)
 +            {
 +                /* We were originally doing long-range VdW-only interactions.
 +                 * If rvdw is still longer than rcoulomb we keep the original nstcalclr,
 +                 * but if the coulomb cutoff has become longer we should update the long-range
 +                 * part every step.
 +                 */
 +                set->nstcalclr = (tmpr_vdw > tmpr_coulomb) ? pme_lb->nstcalclr_start : 1;
 +            }
 +            else
 +            {
 +                /* We were not doing any long-range interaction from the start,
 +                 * since it is not possible to do twin-range coulomb for the PME interaction.
 +                 */
 +                set->nstcalclr = 1;
 +            }
 +        }
 +    }
 +
 +    set->spacing      = sp;
 +    /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
 +    set->grid_efficiency = 1;
 +    for (d = 0; d < DIM; d++)
 +    {
 +        set->grid_efficiency *= (set->grid[d]*sp)/norm(pme_lb->box_start[d]);
 +    }
 +    /* The Ewald coefficient is inversly proportional to the cut-off */
 +    set->ewaldcoeff =
 +        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
 +
 +    set->count   = 0;
 +    set->cycles  = 0;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "PME loadbal: grid %d %d %d, coulomb cutoff %f\n",
 +                set->grid[XX], set->grid[YY], set->grid[ZZ], set->rcut_coulomb);
 +    }
 +    return TRUE;
 +}
 +
 +static void print_grid(FILE *fp_err, FILE *fp_log,
 +                       const char *pre,
 +                       const char *desc,
 +                       const pme_setup_t *set,
 +                       double cycles)
 +{
 +    char buf[STRLEN], buft[STRLEN];
 +
 +    if (cycles >= 0)
 +    {
 +        sprintf(buft, ": %.1f M-cycles", cycles*1e-6);
 +    }
 +    else
 +    {
 +        buft[0] = '\0';
 +    }
 +    sprintf(buf, "%-11s%10s pme grid %d %d %d, coulomb cutoff %.3f%s",
 +            pre,
 +            desc, set->grid[XX], set->grid[YY], set->grid[ZZ], set->rcut_coulomb,
 +            buft);
 +    if (fp_err != NULL)
 +    {
 +        fprintf(fp_err, "\r%s\n", buf);
 +    }
 +    if (fp_log != NULL)
 +    {
 +        fprintf(fp_log, "%s\n", buf);
 +    }
 +}
 +
 +static int pme_loadbal_end(pme_load_balancing_t pme_lb)
 +{
 +    /* In the initial stage only n is set; end is not set yet */
 +    if (pme_lb->end > 0)
 +    {
 +        return pme_lb->end;
 +    }
 +    else
 +    {
 +        return pme_lb->n;
 +    }
 +}
 +
 +static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
 +                                  gmx_large_int_t step,
 +                                  pme_load_balancing_t pme_lb)
 +{
 +    char buf[STRLEN], sbuf[22];
 +
 +    sprintf(buf, "step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
 +            gmx_step_str(step, sbuf),
 +            pmelblim_str[pme_lb->elimited],
 +            pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
 +    if (fp_err != NULL)
 +    {
 +        fprintf(fp_err, "\r%s\n", buf);
 +    }
 +    if (fp_log != NULL)
 +    {
 +        fprintf(fp_log, "%s\n", buf);
 +    }
 +}
 +
 +static void switch_to_stage1(pme_load_balancing_t pme_lb)
 +{
 +    pme_lb->start = 0;
 +    while (pme_lb->start+1 < pme_lb->n &&
 +           (pme_lb->setup[pme_lb->start].count == 0 ||
 +            pme_lb->setup[pme_lb->start].cycles >
 +            pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC))
 +    {
 +        pme_lb->start++;
 +    }
 +    while (pme_lb->start > 0 && pme_lb->setup[pme_lb->start-1].cycles == 0)
 +    {
 +        pme_lb->start--;
 +    }
 +
 +    pme_lb->end = pme_lb->n;
 +    if (pme_lb->setup[pme_lb->end-1].count > 0 &&
 +        pme_lb->setup[pme_lb->end-1].cycles >
 +        pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
 +    {
 +        pme_lb->end--;
 +    }
 +
 +    pme_lb->stage = 1;
 +
 +    /* Next we want to choose setup pme_lb->start, but as we will increase
 +     * pme_ln->cur by one right after returning, we subtract 1 here.
 +     */
 +    pme_lb->cur = pme_lb->start - 1;
 +}
 +
 +gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
 +                          t_commrec           *cr,
 +                          FILE                *fp_err,
 +                          FILE                *fp_log,
 +                          t_inputrec          *ir,
 +                          t_state             *state,
 +                          double               cycles,
 +                          interaction_const_t *ic,
 +                          nonbonded_verlet_t  *nbv,
 +                          gmx_pme_t           *pmedata,
 +                          gmx_large_int_t      step)
 +{
 +    gmx_bool     OK;
 +    pme_setup_t *set;
 +    double       cycles_fast;
 +    char         buf[STRLEN], sbuf[22];
 +    real         rtab;
 +    gmx_bool     bUsesSimpleTables = TRUE;
 +
 +    if (pme_lb->stage == pme_lb->nstage)
 +    {
 +        return FALSE;
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        gmx_sumd(1, &cycles, cr);
 +        cycles /= cr->nnodes;
 +    }
 +
 +    set = &pme_lb->setup[pme_lb->cur];
 +    set->count++;
 +
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (set->count % 2 == 1)
 +    {
 +        /* Skip the first cycle, because the first step after a switch
 +         * is much slower due to allocation and/or caching effects.
 +         */
 +        return TRUE;
 +    }
 +
 +    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
 +    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);
 +
 +    if (set->count <= 2)
 +    {
 +        set->cycles = cycles;
 +    }
 +    else
 +    {
 +        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
 +            pme_lb->stage == pme_lb->nstage - 1)
 +        {
 +            /* The performance went up a lot (due to e.g. DD load balancing).
 +             * Add a stage, keep the minima, but rescan all setups.
 +             */
 +            pme_lb->nstage++;
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
 +                        "Increased the number stages to %d"
 +                        " and ignoring the previous performance\n",
 +                        set->grid[XX], set->grid[YY], set->grid[ZZ],
 +                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
 +                        pme_lb->nstage);
 +            }
 +        }
 +        set->cycles = min(set->cycles, cycles);
 +    }
 +
 +    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
 +    {
 +        pme_lb->fastest = pme_lb->cur;
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            /* We found a new fastest setting, ensure that with subsequent
 +             * shorter cut-off's the dynamic load balancing does not make
 +             * the use of the current cut-off impossible. This solution is
 +             * a trade-off, as the PME load balancing and DD domain size
 +             * load balancing can interact in complex ways.
 +             * With the Verlet kernels, DD load imbalance will usually be
 +             * mainly due to bonded interaction imbalance, which will often
 +             * quickly push the domain boundaries beyond the limit for the
 +             * optimal, PME load balanced, cut-off. But it could be that
 +             * better overal performance can be obtained with a slightly
 +             * shorter cut-off and better DD load balancing.
 +             */
 +            change_dd_dlb_cutoff_limit(cr);
 +        }
 +    }
 +    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;
 +
 +    /* Check in stage 0 if we should stop scanning grids.
 +     * Stop when the time is more than SLOW_FAC longer than the fastest.
 +     */
 +    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
 +        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
 +    {
 +        pme_lb->n = pme_lb->cur + 1;
 +        /* Done with scanning, go to stage 1 */
 +        switch_to_stage1(pme_lb);
 +    }
 +
 +    if (pme_lb->stage == 0)
 +    {
 +        int gridsize_start;
 +
 +        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];
 +
 +        do
 +        {
 +            if (pme_lb->cur+1 < pme_lb->n)
 +            {
 +                /* We had already generated the next setup */
 +                OK = TRUE;
 +            }
 +            else
 +            {
 +                /* Find the next setup */
 +                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order);
 +            }
 +
 +            if (OK && ir->ePBC != epbcNONE)
 +            {
 +                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
 +                      <= max_cutoff2(ir->ePBC, state->box));
 +                if (!OK)
 +                {
 +                    pme_lb->elimited = epmelblimBOX;
 +                }
 +            }
 +
 +            if (OK)
 +            {
 +                pme_lb->cur++;
 +
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    OK = change_dd_cutoff(cr, state, ir,
 +                                          pme_lb->setup[pme_lb->cur].rlistlong);
 +                    if (!OK)
 +                    {
 +                        /* Failed: do not use this setup */
 +                        pme_lb->cur--;
 +                        pme_lb->elimited = epmelblimDD;
 +                    }
 +                }
 +            }
 +            if (!OK)
 +            {
 +                /* We hit the upper limit for the cut-off,
 +                 * the setup should not go further than cur.
 +                 */
 +                pme_lb->n = pme_lb->cur + 1;
 +                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
 +                /* Switch to the next stage */
 +                switch_to_stage1(pme_lb);
 +            }
 +        }
 +        while (OK &&
 +               !(pme_lb->setup[pme_lb->cur].grid[XX]*
 +                 pme_lb->setup[pme_lb->cur].grid[YY]*
 +                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
 +                 gridsize_start*PME_LB_GRID_SCALE_FAC
 +                 &&
 +                 pme_lb->setup[pme_lb->cur].grid_efficiency <
 +                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
 +    }
 +
 +    if (pme_lb->stage > 0 && pme_lb->end == 1)
 +    {
 +        pme_lb->cur   = 0;
 +        pme_lb->stage = pme_lb->nstage;
 +    }
 +    else if (pme_lb->stage > 0 && pme_lb->end > 1)
 +    {
 +        /* If stage = nstage-1:
 +         *   scan over all setups, rerunning only those setups
 +         *   which are not much slower than the fastest
 +         * else:
 +         *   use the next setup
 +         */
 +        do
 +        {
 +            pme_lb->cur++;
 +            if (pme_lb->cur == pme_lb->end)
 +            {
 +                pme_lb->stage++;
 +                pme_lb->cur = pme_lb->start;
 +            }
 +        }
 +        while (pme_lb->stage == pme_lb->nstage - 1 &&
 +               pme_lb->setup[pme_lb->cur].count > 0 &&
 +               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);
 +
 +        if (pme_lb->stage == pme_lb->nstage)
 +        {
 +            /* We are done optimizing, use the fastest setup we found */
 +            pme_lb->cur = pme_lb->fastest;
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
 +    {
 +        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
 +        if (!OK)
 +        {
 +            /* Failsafe solution */
 +            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
 +            {
 +                pme_lb->stage--;
 +            }
 +            pme_lb->fastest  = 0;
 +            pme_lb->start    = 0;
 +            pme_lb->end      = pme_lb->cur;
 +            pme_lb->cur      = pme_lb->start;
 +            pme_lb->elimited = epmelblimDD;
 +            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
 +        }
 +    }
 +
 +    /* Change the Coulomb cut-off and the PME grid */
 +
 +    set = &pme_lb->setup[pme_lb->cur];
 +
 +    ic->rcoulomb   = set->rcut_coulomb;
 +    ic->rlist      = set->rlist;
 +    ic->rlistlong  = set->rlistlong;
 +    ir->nstcalclr  = set->nstcalclr;
 +    ic->ewaldcoeff = set->ewaldcoeff;
 +
 +    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
 +    if (pme_lb->cutoff_scheme == ecutsVERLET &&
 +        nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
 +    {
 +        nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic);
 +    }
 +    else
 +    {
 +        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
 +                                      rtab);
 +    }
 +
 +    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
 +    {
 +        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
 +                                      rtab);
 +    }
 +
 +    if (cr->duty & DUTY_PME)
 +    {
 +        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
 +        {
 +            /* Generate a new PME data structure,
 +             * copying part of the old pointers.
 +             */
 +            gmx_pme_reinit(&set->pmedata,
 +                           cr, pme_lb->setup[0].pmedata, ir,
 +                           set->grid);
 +        }
 +        *pmedata = set->pmedata;
 +    }
 +    else
 +    {
 +        /* Tell our PME-only node to switch grid */
 +        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff);
 +    }
 +
 +    if (debug)
 +    {
 +        print_grid(NULL, debug, "", "switched to", set, -1);
 +    }
 +
 +    if (pme_lb->stage == pme_lb->nstage)
 +    {
 +        print_grid(fp_err, fp_log, "", "optimal", set, -1);
 +    }
 +
 +    return TRUE;
 +}
 +
 +void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n)
 +{
 +    pme_lb->nstage += n;
 +}
 +
 +static int pme_grid_points(const pme_setup_t *setup)
 +{
 +    return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
 +}
 +
++static real pme_loadbal_rlist(const pme_setup_t *setup)
++{
++    /* With the group cut-off scheme we can have twin-range either
++     * for Coulomb or for VdW, so we need a check here.
++     * With the Verlet cut-off scheme rlist=rlistlong.
++     */
++    if (setup->rcut_coulomb > setup->rlist)
++    {
++        return setup->rlistlong;
++    }
++    else
++    {
++        return setup->rlist;
++    }
++}
++
 +static void print_pme_loadbal_setting(FILE              *fplog,
 +                                      char              *name,
 +                                      const pme_setup_t *setup)
 +{
 +    fprintf(fplog,
 +            "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n",
 +            name,
-                                        FILE                *fplog)
++            setup->rcut_coulomb, pme_loadbal_rlist(setup),
 +            setup->grid[XX], setup->grid[YY], setup->grid[ZZ],
 +            setup->spacing, 1/setup->ewaldcoeff);
 +}
 +
 +static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
-     pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlistlong, 3.0);
++                                       t_commrec           *cr,
++                                       FILE                *fplog,
++                                       gmx_bool             bNonBondedOnGPU)
 +{
 +    double pp_ratio, grid_ratio;
 +
-     fprintf(fplog, "\n");
++    pp_ratio   = pow(pme_loadbal_rlist(&pme_lb->setup[pme_lb->cur])/pme_loadbal_rlist(&pme_lb->setup[0]), 3.0);
 +    grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
 +        (double)pme_grid_points(&pme_lb->setup[0]);
 +
 +    fprintf(fplog, "\n");
 +    fprintf(fplog, "       P P   -   P M E   L O A D   B A L A N C I N G\n");
 +    fprintf(fplog, "\n");
 +    /* Here we only warn when the optimal setting is the last one */
 +    if (pme_lb->elimited != epmelblimNO &&
 +        pme_lb->cur == pme_loadbal_end(pme_lb)-1)
 +    {
 +        fprintf(fplog, " NOTE: The PP/PME load balancing was limited by the %s,\n",
 +                pmelblim_str[pme_lb->elimited]);
 +        fprintf(fplog, "       you might not have reached a good load balance.\n");
 +        if (pme_lb->elimited == epmelblimDD)
 +        {
 +            fprintf(fplog, "       Try different mdrun -dd settings or lower the -dds value.\n");
 +        }
 +        fprintf(fplog, "\n");
 +    }
 +    fprintf(fplog, " PP/PME load balancing changed the cut-off and PME settings:\n");
 +    fprintf(fplog, "           particle-particle                    PME\n");
 +    fprintf(fplog, "            rcoulomb  rlist            grid      spacing   1/beta\n");
 +    print_pme_loadbal_setting(fplog, "initial", &pme_lb->setup[0]);
 +    print_pme_loadbal_setting(fplog, "final", &pme_lb->setup[pme_lb->cur]);
 +    fprintf(fplog, " cost-ratio           %4.2f             %4.2f\n",
 +            pp_ratio, grid_ratio);
 +    fprintf(fplog, " (note that these numbers concern only part of the total PP and PME load)\n");
- void pme_loadbal_done(pme_load_balancing_t pme_lb, FILE *fplog)
++
++    if (pp_ratio > 1.5 && !bNonBondedOnGPU)
++    {
++        md_print_warn(cr, fplog,
++                      "NOTE: PME load balancing increased the non-bonded workload by more than 50%%.\n"
++                      "      For better performance use (more) PME nodes (mdrun -npme),\n"
++                      "      or in case you are beyond the scaling limit, use less nodes in total.\n");
++    }
++    else
++    {
++        fprintf(fplog, "\n");
++    }
 +}
 +
-         print_pme_loadbal_settings(pme_lb, fplog);
++void pme_loadbal_done(pme_load_balancing_t pme_lb,
++                      t_commrec *cr, FILE *fplog,
++                      gmx_bool bNonBondedOnGPU)
 +{
 +    if (fplog != NULL && (pme_lb->cur > 0 || pme_lb->elimited != epmelblimNO))
 +    {
++        print_pme_loadbal_settings(pme_lb, cr, fplog, bNonBondedOnGPU);
 +    }
 +
 +    /* TODO: Here we should free all pointers in pme_lb,
 +     * but as it contains pme data structures,
 +     * we need to first make pme.c free all data.
 +     */
 +}
index 472587c7dffe0c16cbbed32f15167ca08bf51d1e,0000000000000000000000000000000000000000..55acc9de1f300b9884d06739de52251418574fec
mode 100644,000000..100644
--- /dev/null
@@@ -1,78 -1,0 +1,80 @@@
- void pme_loadbal_done(pme_load_balancing_t pme_lb, FILE *fplog);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 4.6.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2011, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +#ifndef _pme_loadbal_h
 +#define _pme_loadbal_h
 +
 +typedef struct pme_load_balancing *pme_load_balancing_t;
 +
 +/* Initialze the PP-PME load balacing data and infrastructure */
 +void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
 +                      const t_inputrec *ir, matrix box,
 +                      const interaction_const_t *ic,
 +                      gmx_pme_t pmedata);
 +
 +/* Try to adjust the PME grid and Coulomb cut-off.
 + * The adjustment is done to generate a different non-bonded PP and PME load.
 + * With separate PME nodes (PP and PME on different processes) or with
 + * a GPU (PP on GPU, PME on CPU), PP and PME run on different resources
 + * and changing the load will affect the load balance and performance.
 + * The total time for a set of integration steps is monitored and a range
 + * of grid/cut-off setups is scanned. After calling pme_load_balance many
 + * times and acquiring enough statistics, the best performing setup is chosen.
 + * Here we try to take into account fluctuations and changes due to external
 + * factors as well as DD load balancing.
 + * Returns TRUE the load balancing continues, FALSE is the balancing is done.
 + */
 +gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
 +                          t_commrec           *cr,
 +                          FILE                *fp_err,
 +                          FILE                *fp_log,
 +                          t_inputrec          *ir,
 +                          t_state             *state,
 +                          double               cycles,
 +                          interaction_const_t *ic,
 +                          nonbonded_verlet_t  *nbv,
 +                          gmx_pme_t           *pmedata,
 +                          gmx_large_int_t      step);
 +
 +/* Restart the PME load balancing discarding all timings gathered up till now */
 +void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n);
 +
 +/* Finish the PME load balancing and print the settings when fplog!=NULL */
++void pme_loadbal_done(pme_load_balancing_t pme_lb,
++                      t_commrec *cr, FILE *fplog,
++                      gmx_bool bNonBondedOnGPU);
 +
 +#endif /* _pme_loadbal_h */
Simple merge
Simple merge
Simple merge