Skip to content

Commit 0e985c9

Browse files
[PATCH] Return None when the submodule commit is not contained in the diff (#19)
* Return None when the submodule commit is not contained in the diff * handle commit diff limitations - log warning - add test - update README.md * fix README.md * fix README.md (2) Co-authored-by: ValentinFrancois
1 parent 6ef3bf8 commit 0e985c9

File tree

4 files changed

+97
-26
lines changed

4 files changed

+97
-26
lines changed

README.md

+44-16
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ Gitlab project, and more importantly to get the commits they're pointing to.
1818

1919
Internally, it reads and parses the `.gitmodules` file at the root of the
2020
Project. To get the commit id of a submodule, it finds the last commit that
21-
updated the submodule and parses its diff.
21+
updated the submodule and parses its diff (this can sometimes fail due to a
22+
[limit of the GitLab API itself](https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits) -
23+
see [Limitations](#limitations)).
2224

2325
---
2426
**About the future of this package**
@@ -75,7 +77,7 @@ for subproject in subprojects:
7577
print('- {} ({}) -> {}'.format(
7678
subproject.submodule.path,
7779
subproject.project.web_url,
78-
subproject.commit.id))
80+
subproject.commit.id if subproject.commit else '?'))
7981
```
8082
Output:
8183
```
@@ -95,13 +97,16 @@ for subproject in subprojects:
9597
- print('- {} ({}) -> {}'.format(
9698
- subproject.submodule.path,
9799
- subproject.project.web_url,
98-
- subproject.commit.id))
100+
- subproject.commit.id if subproject.commit else '?'))
99101
+ head_subproject_commit = subproject.project.commits.list(
100102
+ ref=subproject.project.default_branch)[0]
101-
+ up_to_date = subproject.commit.id == head_subproject_commit.id
102-
+ print('- {}: {}'.format(
103-
+ subproject.submodule.path,
104-
+ 'ok' if up_to_date else '/!\\ must update'))
103+
+ if subproject.commit is None: # can happen with very large commit diffs
104+
+ status = 'cannot check'
105+
+ elif subproject.commit.id == head_subproject_commit.id:
106+
+ status = 'ok'
107+
+ else:
108+
+ status = '/!\\ must update'
109+
+ print('- {}: {}'.format(subproject.submodule.path, status))
105110

106111
```
107112
Output:
@@ -125,7 +130,7 @@ iterate_subprojects(
125130
self_managed_gitlab_host: Optional[str] = None
126131
) -> Generator[Subproject, None, None]
127132
```
128-
Parameters:
133+
#### Parameters:
129134
- `project`: a `gitlab.v4.objects.Project` object
130135
- `gitlab`: the `gitlab.Gitlab` instance that you used to authenticate, or its
131136
`projects: gitlab.v4.objects.ProjectManager` attribute
@@ -139,16 +144,31 @@ Parameters:
139144
self-managed GitLab instance, you should pass its url here otherwise it
140145
may be impossible to know from the URL that it's a GitLab project.
141146

142-
Returns: Generator of `Subproject` objects
147+
#### Returns:
148+
Generator of `Subproject` objects
149+
150+
#### Limitations:
151+
- due to https://docs.gitlab.com/ee/development/diffs.html#diff-collection-limits,
152+
some very large commit diffs won't be parsed entirely. This means that when
153+
inspecting the diff of the latest commit that updated `./<submodule_dir>`,
154+
in some rare cases `./<submodule_dir>` might not be part of the diff
155+
object returned by the GitLab API. In that case we have no other choice than
156+
set `Subproject.commit` to `None`, that's why the two examples above
157+
check if `subproject.commit` is not `None` before using the value
158+
`subproject.commit.id`.
159+
160+
---
143161

144162
### `list_subprojects(...)`
145163
Same parameters as [`iterate_subprojects(...)`](#iterate_subprojects) but
146164
returns a `list` of [`Subproject`](#class-subproject) objects.
147165

166+
---
167+
148168
### class `Subproject`
149169
Basic objects that contain the info about a Gitlab subproject.
150170

151-
Attributes:
171+
#### Attributes:
152172
- `project: Optional[gitlab.v4.objects.Project]`: the Gitlab project that the
153173
submodule links to (can be `None` if the submodule is not hosted on GitLab)
154174
- `submodule: `[`Submodule`](#class-submodule): a basic object that contains
@@ -157,7 +177,7 @@ Attributes:
157177
the submodule points to (if the submodule is not hosted on GitLab, it will
158178
be a dummy `Commit` object with a single attribute `id`)
159179

160-
Example `str()` output:
180+
#### Example `str()` output:
161181
```
162182
<class 'Subproject'> => {
163183
'submodule': <class 'Submodule'> => {'name': 'share/extensions', 'parent_project': <class 'gitlab.v4.objects.projects.Project'> => {'id': 3472737, 'description': 'Inkscape vector image editor', 'name': 'inkscape', 'name_with_namespace': 'Inkscape / inkscape', 'path': 'inkscape', 'path_with_namespace': 'inkscape/inkscape', 'created_at': '2017-06-09T14:16:35.615Z', 'default_branch': 'master', 'tag_list': [], 'topics': [], 'ssh_url_to_repo': 'git@gitlab.com:inkscape/inkscape.git', 'http_url_to_repo': 'https://gitlab.com/inkscape/inkscape.git', 'web_url': 'https://gitlab.com/inkscape/inkscape', 'readme_url': 'https://gitlab.com/inkscape/inkscape/-/blob/master/README.md', 'avatar_url': 'https://gitlab.com/uploads/-/system/project/avatar/3472737/inkscape.png', 'forks_count': 900, 'star_count': 2512, 'last_activity_at': '2022-01-29T23:45:49.894Z', 'namespace': {'id': 470642, 'name': 'Inkscape', 'path': 'inkscape', 'kind': 'group', 'full_path': 'inkscape', 'parent_id': None, 'avatar_url': '/uploads/-/system/group/avatar/470642/inkscape.png', 'web_url': 'https://gitlab.com/groups/inkscape'}}, 'parent_ref': 'e371b2f826adcba316f2e64bbf2f697043373d0b', 'path': 'share/extensions', 'url': 'https://gitlab.com/inkscape/extensions.git'},
@@ -166,45 +186,52 @@ Example `str()` output:
166186
}
167187
```
168188

189+
---
190+
169191
### `list_submodules(...)`
170192
Lists the info about the project submodules found in the `.gitmodules` file.
171193
```python
172194
list_project_submodules(
173195
project: Project,
174196
ref: Optional[str] = None) -> List[Submodule]
175197
```
176-
Parameters:
198+
#### Parameters:
177199
- `project`: a `gitlab.v4.objects.Project` object
178200
- `ref`: (optional) a ref to a branch, commit, tag etc. Defaults to the
179201
HEAD of the project default branch.
180202

181-
Returns: list of `Submodule` objects
203+
#### Returns:
204+
`list` of `Submodule` objects
205+
206+
---
182207

183208
### class `Submodule`
184209
Represents the `.gitmodules` config of a submodule + adds info about the
185210
parent project
186211

187-
Attributes:
212+
#### Attributes:
188213
- `parent_project: gitlab.v4.objects.Project`: project that uses the submodule
189214
- `parent_ref: str`: ref where the `.gitmodules` file was read
190215
- `name: str`: local name used by git for the submodule
191216
- `path: str`: local path pointing to the submodule directory in the project
192217
- `url: str`: URL linking to the location of the repo of the submodule (not
193218
necessarily Gitlab)
194219

195-
Example `str()` output:
220+
#### Example `str()` output:
196221
```
197222
<class 'Submodule'> => {'name': 'share/extensions', 'parent_project': <class 'gitlab.v4.objects.projects.Project'> => {'id': 3472737, 'description': 'Inkscape vector image editor', 'name': 'inkscape', 'name_with_namespace': 'Inkscape / inkscape', 'path': 'inkscape', 'path_with_namespace': 'inkscape/inkscape', 'created_at': '2017-06-09T14:16:35.615Z', 'default_branch': 'master', 'tag_list': [], 'topics': [], 'ssh_url_to_repo': 'git@gitlab.com:inkscape/inkscape.git', 'http_url_to_repo': 'https://gitlab.com/inkscape/inkscape.git', 'web_url': 'https://gitlab.com/inkscape/inkscape', 'readme_url': 'https://gitlab.com/inkscape/inkscape/-/blob/master/README.md', 'avatar_url': 'https://gitlab.com/uploads/-/system/project/avatar/3472737/inkscape.png', 'forks_count': 900, 'star_count': 2512, 'last_activity_at': '2022-01-29T23:45:49.894Z', 'namespace': {'id': 470642, 'name': 'Inkscape', 'path': 'inkscape', 'kind': 'group', 'full_path': 'inkscape', 'parent_id': None, 'avatar_url': '/uploads/-/system/group/avatar/470642/inkscape.png', 'web_url': 'https://gitlab.com/groups/inkscape'}}, 'parent_ref': 'e371b2f826adcba316f2e64bbf2f697043373d0b', 'path': 'share/extensions', 'url': 'https://gitlab.com/inkscape/extensions.git'}
198223
```
199224

225+
---
226+
200227
### `submodule_to_subproject(...)`
201228
Converts a `Submodule` object to a [`Subproject`](#class-subproject) object, assuming it's
202229
hosted on Gitlab.
203230

204231
Raises a `FileNotFoundError` if the path of the submodule actually doesn't
205232
exist in the host repo or if the url of the submodule doesn't link to an
206233
existing repo (both can happen if you modify the `.gitmodules` file without
207-
using one of the `git submodule` commands)
234+
using one of the `git submodule` commands).
208235

209236
```python
210237
submodule_to_subproject(
@@ -215,6 +242,7 @@ submodule_to_subproject(
215242
```
216243
Parameter details: See [`iterate_subprojects(...)`](#iterate_subprojects)
217244

245+
---
218246

219247
## Contributing
220248

gitlab_submodule/objects.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class Subproject:
6161
def __init__(self,
6262
submodule: Submodule,
6363
project: Optional[Project],
64-
commit: Union[ProjectCommit, Commit]):
64+
commit: Optional[Union[ProjectCommit, Commit]]):
6565
self.submodule = submodule
6666
self.project = project
6767
self.commit = commit

gitlab_submodule/submodule_commit.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
11
from typing import Optional, Union
22

3+
import logging
34
import re
5+
from os import path
46

57
from gitlab.v4.objects import Project, ProjectCommit
68
from gitlab.exceptions import GitlabGetError
79

810
from gitlab_submodule.objects import Submodule, Commit
911

1012

13+
logger = logging.getLogger(__name__)
14+
15+
1116
def get_submodule_commit(
1217
submodule: Submodule,
1318
submodule_project: Optional[Project] = None,
14-
) -> Union[ProjectCommit, Commit]:
19+
) -> Optional[Union[ProjectCommit, Commit]]:
1520
commit_id = _get_submodule_commit_id(
1621
submodule.parent_project,
1722
submodule.path,
1823
submodule.parent_ref,
1924
)
25+
if commit_id is None:
26+
return None
27+
2028
if submodule_project is not None:
2129
commit = submodule_project.commits.get(commit_id)
2230
else:
@@ -28,7 +36,7 @@ def _get_submodule_commit_id(
2836
project: Project,
2937
submodule_path: str,
3038
ref: Optional[str] = None,
31-
) -> str:
39+
) -> Optional[str]:
3240
"""This uses a trick:
3341
- The .gitmodules files doesn't contain the actual commit sha that the
3442
submodules points to.
@@ -54,7 +62,9 @@ def _get_submodule_commit_id(
5462
update_submodule_commit = project.commits.get(last_commit_id)
5563

5664
submodule_commit_regex = r'Subproject commit ([a-zA-Z0-9]+)\n'
65+
n_files_in_diff = 0
5766
for diff_file in update_submodule_commit.diff(as_list=False):
67+
n_files_in_diff += 1
5868
if diff_file['new_path'] == submodule_path:
5969
# either the commit id was added for the first time,
6070
# or it was updated -> we can find one or two matches
@@ -67,6 +77,12 @@ def _get_submodule_commit_id(
6777
if len(matches) == 1:
6878
return matches[0]
6979

70-
# should never happen
71-
raise RuntimeError(f'Did not find any commit id for submodule '
72-
f'"{submodule_path}" at url "{project.web_url}"')
80+
logger.warning(
81+
f'Could not retrieve commit id for submodule at '
82+
f'{path.join(".", submodule_path)} for project '
83+
f'{project.path_with_namespace}, probably because the last commit '
84+
f'that updated the submodule has a too large diff '
85+
f'({n_files_in_diff} files). More info on Gitlab API diff limits: '
86+
f'https://docs.gitlab.com/ee/development/diffs.html'
87+
f'#diff-collection-limits')
88+
return None

tests/test_submodule_commit.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import logging
12
import unittest
3+
from unittest.mock import Mock
24

35
from gitlab import Gitlab
46
from gitlab.v4.objects import ProjectCommit
@@ -38,10 +40,10 @@ def test__get_submodule_commit_id_with_absolute_urls(self):
3840

3941
def test_get_submodule_commit_with_absolute_urls(self):
4042
gl = Gitlab()
41-
inkscape = gl.projects.get(
43+
project = gl.projects.get(
4244
'python-gitlab-submodule-test/test-projects/gitlab-absolute-urls')
4345
submodules = list_project_submodules(
44-
inkscape,
46+
project,
4547
ref='ce9b1e50b34372d82df098f3ffded58ef4be03ec')
4648
submodule_projects = [
4749
submodule_to_project(submodule, gl.projects)
@@ -63,9 +65,9 @@ def test_get_submodule_commit_with_absolute_urls(self):
6365

6466
def test_get_submodule_commit_with_relative_urls(self):
6567
gl = Gitlab()
66-
inkscape = gl.projects.get(
68+
project = gl.projects.get(
6769
'python-gitlab-submodule-test/test-projects/gitlab-relative-urls')
68-
submodules = list_project_submodules(inkscape, ref='main')
70+
submodules = list_project_submodules(project, ref='main')
6971
submodule_projects = [
7072
submodule_to_project(submodule, gl.projects)
7173
for submodule in submodules[:4]]
@@ -84,3 +86,28 @@ def test_get_submodule_commit_with_relative_urls(self):
8486
'906828a297594114e3b1c48d2191eb31a91284c9'}, # 4
8587
{commit.id for commit in submodule_commits}
8688
)
89+
90+
def test_get_submodule_commit_too_big_diff(self):
91+
92+
def mock_get_commit(*_, **__):
93+
mock_commit = Mock()
94+
mock_diff = [
95+
{'new_path': f'mock_file_{i}.txt'}
96+
for i in range(1, 1001)
97+
]
98+
mock_commit.diff = lambda *_, **__: mock_diff
99+
return mock_commit
100+
101+
gl = Gitlab()
102+
project = gl.projects.get(
103+
'python-gitlab-submodule-test/test-projects/gitlab-relative-urls')
104+
submodule = list_project_submodules(project, ref='main')[0]
105+
submodule_project = submodule_to_project(submodule, gl.projects)
106+
submodule.parent_project.commits.get = mock_get_commit
107+
with self.assertLogs(level=logging.WARNING) as log:
108+
submodule_commit = get_submodule_commit(submodule,
109+
submodule_project)
110+
self.assertEqual(1, len(log.output))
111+
self.assertIn('the submodule has a too large diff (1000 files).',
112+
log.output[0])
113+
self.assertIsNone(submodule_commit)

0 commit comments

Comments
 (0)