Skip to content

Commit 5e1a429

Browse files
authored
Merge pull request #4316 from OpenMathLib/develop
Merge develop into release-0.3.0 for 0.3.25
2 parents 0e54cbd + 64c9671 commit 5e1a429

File tree

491 files changed

+14379
-39851
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

491 files changed

+14379
-39851
lines changed

.cirrus.yml

+10
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,16 @@ FreeBSD_task:
148148
- ls -l /usr/local/lib
149149
- gmake CC=gcc INTERFACE64=1
150150

151+
FreeBSD_task:
152+
name: FreeBSD-clang-openmp
153+
freebsd_instance:
154+
image_family: freebsd-13-2
155+
install_script:
156+
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
157+
- ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
158+
compile_script:
159+
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
160+
151161
#task:
152162
# name: Windows/LLVM16 --- too slow ---
153163
# windows_container:

.cirun.yml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Self-Hosted Github Action Runners on AWS via Cirun.io
2+
# Reference: https://docs.cirun.io/reference/yaml
3+
runners:
4+
- name: "aws-runner-graviton"
5+
# Cloud Provider: AWS
6+
cloud: "aws"
7+
region: "us-east-1"
8+
# Cheapest VM on AWS
9+
instance_type: "c7g.large"
10+
# Ubuntu-22.04, ami image
11+
machine_image: "ami-0a0c8eebcdd6dcbd0"
12+
preemptible: false
13+
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
14+
# So that this runner is created for running the workflow
15+
labels:
16+
- "cirun-aws-runner-graviton"

.github/workflows/arm64_graviton.yml

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
name: arm64 graviton cirun
2+
3+
on:
4+
push:
5+
branches:
6+
- develop
7+
- release-**
8+
pull_request:
9+
branches:
10+
- develop
11+
- release-**
12+
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
15+
cancel-in-progress: true
16+
17+
permissions:
18+
contents: read # to fetch code (actions/checkout)
19+
20+
jobs:
21+
build:
22+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
23+
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
24+
25+
strategy:
26+
fail-fast: false
27+
matrix:
28+
fortran: [gfortran]
29+
build: [cmake, make]
30+
31+
steps:
32+
- name: Checkout repository
33+
uses: actions/checkout@v3
34+
35+
- name: Print system information
36+
run: |
37+
if [ "$RUNNER_OS" == "Linux" ]; then
38+
cat /proc/cpuinfo
39+
else
40+
echo "::error::$RUNNER_OS not supported"
41+
exit 1
42+
fi
43+
44+
- name: Install Dependencies
45+
run: |
46+
if [ "$RUNNER_OS" == "Linux" ]; then
47+
sudo apt update
48+
sudo apt-get install -y gfortran cmake ccache libtinfo5
49+
else
50+
echo "::error::$RUNNER_OS not supported"
51+
exit 1
52+
fi
53+
54+
- name: Compilation cache
55+
uses: actions/cache@v3
56+
with:
57+
path: ~/.ccache
58+
# We include the commit sha in the cache key, as new cache entries are
59+
# only created if there is no existing entry for the key yet.
60+
# GNU make and cmake call the compilers differently. It looks like
61+
# that causes the cache to mismatch. Keep the ccache for both build
62+
# tools separate to avoid polluting each other.
63+
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
64+
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
65+
restore-keys: |
66+
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
67+
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
68+
ccache-${{ runner.os }}-${{ matrix.build }}
69+
70+
- name: Configure ccache
71+
run: |
72+
if [ "${{ matrix.build }}" = "make" ]; then
73+
# Add ccache to path
74+
if [ "$RUNNER_OS" = "Linux" ]; then
75+
echo "/usr/lib/ccache" >> $GITHUB_PATH
76+
else
77+
echo "::error::$RUNNER_OS not supported"
78+
exit 1
79+
fi
80+
fi
81+
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
82+
test -d ~/.ccache || mkdir -p ~/.ccache
83+
echo "max_size = 300M" > ~/.ccache/ccache.conf
84+
echo "compression = true" >> ~/.ccache/ccache.conf
85+
ccache -s
86+
87+
- name: Build OpenBLAS
88+
run: |
89+
case "${{ matrix.build }}" in
90+
"make")
91+
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
92+
;;
93+
"cmake")
94+
mkdir build && cd build
95+
cmake -DDYNAMIC_ARCH=1 \
96+
-DNOFORTRAN=0 \
97+
-DBUILD_WITHOUT_LAPACK=0 \
98+
-DCMAKE_VERBOSE_MAKEFILE=ON \
99+
-DCMAKE_BUILD_TYPE=Release \
100+
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
101+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
102+
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
103+
..
104+
cmake --build .
105+
;;
106+
*)
107+
echo "::error::Configuration not supported"
108+
exit 1
109+
;;
110+
esac
111+
112+
- name: Show ccache status
113+
continue-on-error: true
114+
run: ccache -s
115+
116+
- name: Run tests
117+
timeout-minutes: 60
118+
run: |
119+
case "${{ matrix.build }}" in
120+
"make")
121+
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
122+
echo "::group::Tests in 'test' directory"
123+
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
124+
echo "::endgroup::"
125+
echo "::group::Tests in 'ctest' directory"
126+
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
127+
echo "::endgroup::"
128+
echo "::group::Tests in 'utest' directory"
129+
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
130+
echo "::endgroup::"
131+
;;
132+
"cmake")
133+
cd build && ctest
134+
;;
135+
*)
136+
echo "::error::Configuration not supported"
137+
exit 1
138+
;;
139+
esac

.github/workflows/c910v.yml

+5
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,16 @@ name: c910v qemu test
22

33
on: [push, pull_request]
44

5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
59
permissions:
610
contents: read # to fetch code (actions/checkout)
711

812
jobs:
913
TEST:
14+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
1015
runs-on: ubuntu-latest
1116
env:
1217
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282

.github/workflows/dynamic_arch.yml

+14-7
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,16 @@ name: continuous build
22

33
on: [push, pull_request]
44

5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
59
permissions:
610
contents: read # to fetch code (actions/checkout)
711

812
jobs:
913
build:
14+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
1015
runs-on: ${{ matrix.os }}
1116

1217
strategy:
@@ -146,18 +151,19 @@ jobs:
146151
147152
148153
msys2:
154+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
149155
runs-on: windows-latest
150156

151157
strategy:
152158
fail-fast: false
153159
matrix:
154-
msystem: [MINGW64, MINGW32, CLANG64, CLANG32]
160+
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
155161
idx: [int32, int64]
156162
build-type: [Release]
157163
include:
158-
- msystem: MINGW64
164+
- msystem: UCRT64
159165
idx: int32
160-
target-prefix: mingw-w64-x86_64
166+
target-prefix: mingw-w64-ucrt-x86_64
161167
fc-pkg: fc
162168
- msystem: MINGW32
163169
idx: int32
@@ -175,10 +181,10 @@ jobs:
175181
target-prefix: mingw-w64-clang-i686
176182
fc-pkg: cc
177183
c-lapack-flags: -DC_LAPACK=ON
178-
- msystem: MINGW64
184+
- msystem: UCRT64
179185
idx: int64
180186
idx64-flags: -DBINARY=64 -DINTERFACE64=1
181-
target-prefix: mingw-w64-x86_64
187+
target-prefix: mingw-w64-ucrt-x86_64
182188
fc-pkg: fc
183189
- msystem: CLANG64
184190
idx: int64
@@ -188,9 +194,9 @@ jobs:
188194
# Compiling with Flang 16 seems to cause test errors on machines
189195
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
190196
no-avx512-flags: -DNO_AVX512=1
191-
- msystem: MINGW64
197+
- msystem: UCRT64
192198
idx: int32
193-
target-prefix: mingw-w64-x86_64
199+
target-prefix: mingw-w64-ucrt-x86_64
194200
fc-pkg: fc
195201
build-type: None
196202
exclude:
@@ -312,6 +318,7 @@ jobs:
312318
313319
314320
cross_build:
321+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
315322
runs-on: ubuntu-22.04
316323

317324
strategy:

.github/workflows/loongarch64.yml

+8
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@ name: loongarch64 qemu test
22

33
on: [push, pull_request]
44

5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
59
jobs:
610
TEST:
11+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
712
runs-on: ubuntu-latest
813
strategy:
914
fail-fast: false
@@ -18,6 +23,9 @@ jobs:
1823
- target: LOONGSON2K1000
1924
triple: loongarch64-unknown-linux-gnu
2025
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
26+
- target: DYNAMIC_ARCH
27+
triple: loongarch64-unknown-linux-gnu
28+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
2129

2230
steps:
2331
- name: Checkout repository

.github/workflows/mips64.yml

+5
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,16 @@ name: mips64 qemu test
22

33
on: [push, pull_request]
44

5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
59
permissions:
610
contents: read # to fetch code (actions/checkout)
711

812
jobs:
913
TEST:
14+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
1015
runs-on: ubuntu-latest
1116
strategy:
1217
fail-fast: false

.github/workflows/nightly-Homebrew-build.yml

+5
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,16 @@ on:
1818

1919
name: Nightly-Homebrew-Build
2020

21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
23+
cancel-in-progress: true
24+
2125
permissions:
2226
contents: read # to fetch code (actions/checkout)
2327

2428
jobs:
2529
build-OpenBLAS-with-Homebrew:
30+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
2631
runs-on: macos-latest
2732
env:
2833
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer

Changelog.txt

+46
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,50 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.25
4+
12-Nov-2023
5+
6+
general:
7+
- improved the error message shown on exceeding the maximum thread count
8+
- improved the code to add supplementary thread buffers in case of overflow
9+
- fixed a potential division by zero in ?ROTG
10+
- improved the ?MATCOPY functions to accept zero-sized rows or columns
11+
- corrected empty prototypes in function declarations
12+
- cleaned up unused declarations in the f2c-converted versions of the LAPACK sources
13+
- fixed compilation with the Cray CCE Compiler suite
14+
- improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran
15+
- worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD
16+
- improved the Makefiles to require less option duplication on "make install"
17+
- imported the following changes from the upcoming release 3.12 of Reference-LAPACK
18+
- deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900)
19+
- apply rounding up to workspace calculations done in floating point (LAPACK PR 904)
20+
- avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907)
21+
- fix accumulation in ?LASSQ (LAPACK PR 909)
22+
- fix handling of NaN values in ?GECON (LAPACK PR 926)
23+
- avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927)
24+
- fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930)
25+
26+
x86-64:
27+
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus
28+
- fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH
29+
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake
30+
31+
ARM64:
32+
- fixed building on Apple with homebrew gcc
33+
- fixed building with XCODE 15
34+
- fixed building on A64FX and Cortex A710/X1/X2
35+
- increased the default buffer size for recent ARM server cpus
36+
37+
POWER:
38+
- fixed building with the IBM xlf 16.1.1 compiler
39+
- fixed building with IBM XL C
40+
- added support for DYNAMIC_ARCH builds with clang
41+
- fixed union declaration in the BFLOAT16 test case
42+
- enable optimizations for the AIX assembler on POWER10
43+
44+
LOONGARCH64:
45+
- added an optimized SGEMV kernel
46+
- added an optimized DTRSM kernel
47+
248
====================================================================
349
Version 0.3.24
450
03-Sep-2023

0 commit comments

Comments
 (0)