1
0
mirror of https://github.com/owncloud/ocis.git synced 2025-04-18 23:44:07 +03:00

Merge pull request #11163 from owncloud/search_prevent_file_indexing

feat: include special directories that won't be indexed
This commit is contained in:
Juan Pablo Villafañez 2025-04-01 15:32:38 +02:00 committed by GitHub
commit b29dc771f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 118 additions and 27 deletions

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"path/filepath"
"slices"
"sort"
"strconv"
"strings"
@ -42,6 +43,15 @@ const (
_slowQueryDuration = 500 * time.Millisecond
)
var (
// _skipPathNames is a list of paths that should be skipped when walking the tree.
// In case of directories, just the directory itself is skipped, not its content.
_skipPathNames = []string{"."}
// _skipPathDirs is a list of directories that should be skipped when walking the tree.
// The directory itself and its content is skipped.
_skipPathDirs = []string{"./.space"}
)
// Searcher is the interface to the SearchService
type Searcher interface {
Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error)
@ -436,8 +446,14 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
return nil
}
relPath := utils.MakeRelativePath(filepath.Join(wd, info.Path))
if slices.Contains(_skipPathDirs, relPath) {
s.logger.Info().Str("path", relPath).Msg("skipping directory from being indexed")
return filepath.SkipDir
}
ref := &provider.Reference{
Path: utils.MakeRelativePath(filepath.Join(wd, info.Path)),
Path: relPath,
ResourceId: &rootID,
}
s.logger.Debug().Str("path", ref.Path).Msg("Walking tree")
@ -484,6 +500,18 @@ func (s *Service) UpsertItem(ref *provider.Reference) {
return
}
if slices.Contains(_skipPathNames, path) || slices.Contains(_skipPathDirs, path) {
s.logger.Info().Str("path", path).Msg("file won't be indexed")
return
}
for _, skipPath := range _skipPathDirs {
if strings.HasPrefix(path, skipPath+"/") {
s.logger.Info().Str("path", path).Msg("file is in a directory that won't be indexed")
return
}
}
doc, err := s.extractor.Extract(ctx, stat.Info)
if err != nil {
s.logger.Error().Err(err).Msg("failed to extract resource content")

View File

@ -16,6 +16,7 @@ import (
"github.com/owncloud/ocis/v2/services/search/pkg/config"
"github.com/owncloud/ocis/v2/services/search/pkg/content"
contentMocks "github.com/owncloud/ocis/v2/services/search/pkg/content/mocks"
"github.com/owncloud/ocis/v2/services/search/pkg/engine"
engineMocks "github.com/owncloud/ocis/v2/services/search/pkg/engine/mocks"
"github.com/owncloud/ocis/v2/services/search/pkg/search"
revactx "github.com/owncloud/reva/v2/pkg/ctx"
@ -63,13 +64,49 @@ var _ = Describe("Searchprovider", func() {
ri = &sprovider.ResourceInfo{
Id: &sprovider.ResourceId{
StorageId: "storageid",
SpaceId: "spaceid",
OpaqueId: "opaqueid",
},
ParentId: &sprovider.ResourceId{
StorageId: "storageid",
SpaceId: "spaceid",
OpaqueId: "parentopaqueid",
},
Path: "foo.pdf",
Path: "./foo.pdf",
Size: 12345,
Mtime: &typesv1beta1.Timestamp{Seconds: 4000},
}
ri2 = &sprovider.ResourceInfo{
Id: &sprovider.ResourceId{
StorageId: "storageid",
SpaceId: "spaceid",
OpaqueId: "opaqueid",
},
ParentId: &sprovider.ResourceId{
StorageId: "storageid",
SpaceId: "spaceid",
OpaqueId: "parentopaqueid",
},
Type: sprovider.ResourceType_RESOURCE_TYPE_CONTAINER,
Path: "./.space",
Size: 12345,
Mtime: &typesv1beta1.Timestamp{Seconds: 4000},
}
ri2_1 = &sprovider.ResourceInfo{
Id: &sprovider.ResourceId{
StorageId: "storageid",
SpaceId: "spaceid",
OpaqueId: "opaqueid_1",
},
ParentId: &sprovider.ResourceId{
StorageId: "storageid",
SpaceId: "spaceid",
OpaqueId: "opaqueid",
},
Type: sprovider.ResourceType_RESOURCE_TYPE_FILE,
Path: "./.space/file.pdf",
Size: 12345,
Mtime: &typesv1beta1.Timestamp{Seconds: 4000},
}
@ -122,15 +159,46 @@ var _ = Describe("Searchprovider", func() {
User: user,
}, nil)
extractor.On("Extract", mock.Anything, mock.Anything, mock.Anything).Return(content.Document{}, nil)
indexClient.On("Upsert", mock.Anything, mock.Anything).Return(nil)
indexClient.On("Upsert", mock.Anything, mock.MatchedBy(func(r engine.Resource) bool {
return r.ID == "storageid$spaceid!opaqueid" && r.Path == "./foo.pdf"
})).Return(nil)
indexClient.On("Search", mock.Anything, mock.Anything).Return(&searchsvc.SearchIndexResponse{}, nil)
gatewayClient.On("Stat", mock.Anything, mock.Anything).Return(&sprovider.StatResponse{
gatewayClient.On("Stat", mock.Anything, mock.MatchedBy(func(sreq *sprovider.StatRequest) bool {
return sreq.Ref.ResourceId.StorageId == "storageid" &&
sreq.Ref.ResourceId.OpaqueId == "spaceid" &&
sreq.Ref.ResourceId.SpaceId == "spaceid"
})).Return(&sprovider.StatResponse{
Status: status.NewOK(context.Background()),
Info: ri,
}, nil)
err := s.IndexSpace(&sprovider.StorageSpaceId{OpaqueId: "storageid$spaceid!spaceid"})
Expect(err).ShouldNot(HaveOccurred())
})
It("walks the space skipping .space directory", func() {
gatewayClient.On("GetUserByClaim", mock.Anything, mock.Anything).Return(&userv1beta1.GetUserByClaimResponse{
Status: status.NewOK(context.Background()),
User: user,
}, nil)
extractor.On("Extract", mock.Anything, mock.Anything, mock.Anything).Return(content.Document{}, nil)
gatewayClient.On("ListContainer", mock.Anything, mock.Anything).Return(&sprovider.ListContainerResponse{
Status: status.NewOK(context.Background()),
Infos: []*sprovider.ResourceInfo{ri2_1},
}, nil)
gatewayClient.On("Stat", mock.Anything, mock.MatchedBy(func(sreq *sprovider.StatRequest) bool {
return sreq.Ref.ResourceId.StorageId == "storageid" &&
sreq.Ref.ResourceId.OpaqueId == "spaceid" &&
sreq.Ref.ResourceId.SpaceId == "spaceid"
})).Return(&sprovider.StatResponse{
Status: status.NewOK(context.Background()),
Info: ri2,
}, nil)
indexClient.AssertNotCalled(GinkgoT(), "Upsert", mock.Anything, mock.Anything)
indexClient.AssertNotCalled(GinkgoT(), "Search", mock.Anything, mock.Anything)
err := s.IndexSpace(&sprovider.StorageSpaceId{OpaqueId: "storageid$spaceid!spaceid"})
Expect(err).ShouldNot(HaveOccurred())
})
})
Describe("Search", func() {

View File

@ -262,22 +262,20 @@ The expected failures in this file are from features in the owncloud/ocis repo.
- [apiSearch1/search.feature:42](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L42)
- [apiSearch1/search.feature:69](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L69)
- [apiSearch1/search.feature:70](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L70)
- [apiSearch1/search.feature:111](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L111)
- [apiSearch1/search.feature:112](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L112)
- [apiSearch1/search.feature:196](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L196)
- [apiSearch1/search.feature:197](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L197)
- [apiSearch1/search.feature:198](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L198)
- [apiSearch1/search.feature:220](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L220)
- [apiSearch1/search.feature:221](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L221)
- [apiSearch1/search.feature:222](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L222)
- [apiSearch1/search.feature:241](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L241)
- [apiSearch1/search.feature:242](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L242)
- [apiSearch1/search.feature:243](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L243)
- [apiSearch1/search.feature:260](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L260)
- [apiSearch1/search.feature:261](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L261)
- [apiSearch1/search.feature:259](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L259)
- [apiSearch1/search.feature:277](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L277)
- [apiSearch1/search.feature:278](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L278)
- [apiSearch1/search.feature:279](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L279)
- [apiSearch1/search.feature:301](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L301)
- [apiSearch1/search.feature:302](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L302)
- [apiSearch1/search.feature:303](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L303)
- [apiSearch1/search.feature:353](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L353)
- [apiSearch1/search.feature:354](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L354)
- [apiSearch1/search.feature:355](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L355)
- [apiSearch2/tagSearch.feature:34](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch2/tagSearch.feature#L34)
- [apiSearch2/tagSearch.feature:35](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch2/tagSearch.feature#L35)
- [apiSearch2/tagSearch.feature:62](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch2/tagSearch.feature#L62)

View File

@ -97,14 +97,14 @@ Feature: Search
| new |
| spaces |
@issue-10329
Scenario Outline: user can search hidden files
@issue-10329 @issue-11028
Scenario Outline: user can't search hidden files
Given using <dav-path-version> DAV path
And user "Alice" has created a folder ".space" in space "project101"
When user "Alice" searches for "*.sp*" using the WebDAV API
Then the HTTP status code should be "207"
And the search result should contain "1" entries
And the search result of user "Alice" should contain these entries:
And the search result should contain "0" entries
And the search result of user "Alice" should not contain these entries:
| /.space |
Examples:
| dav-path-version |
@ -172,13 +172,12 @@ Feature: Search
| new |
| spaces |
@issue-10329
Scenario: user can search project space by name
@issue-10329 @issue-11028
Scenario: user can't search project space by name
Given using spaces DAV path
When user "Alice" searches for '*project101*' using the WebDAV API
Then the HTTP status code should be "207"
And the search result should contain "1" entries
And for user "Alice" the search result should contain space "project101"
And the search result should contain "0" entries
@issue-10329
Scenario Outline: user can search inside folder in space

View File

@ -168,7 +168,7 @@ Feature: media type search
| *rar* | /data.rar |
| *bzip2* | /data.tar.bz2 |
@issue-10329
@issue-10329 @issue-11028
Scenario: search files with different mediatype filter
Given user "Alice" has created folder "testFolder"
And user "Alice" has uploaded file "filesForUpload/lorem.txt" to "lorem.txt"
@ -184,9 +184,8 @@ Feature: media type search
And user "Alice" has uploaded file "filesForUpload/data.tar.bz2" to "data.tar.bz2"
When user "Alice" searches for "mediatype:folder" using the WebDAV API
Then the HTTP status code should be "207"
And the search result should contain "2" entries
And the search result should contain "1" entries
And the search result of user "Alice" should contain these entries:
| %spaceid% |
| testFolder |
When user "Alice" searches for "mediatype:document" using the WebDAV API
Then the HTTP status code should be "207"

View File

@ -242,7 +242,7 @@ Feature: tag search
| new |
| spaces |
@issue-10329
@issue-10329 @issue-11163
Scenario Outline: search resources using different search patterns (KQL feature)
Given using spaces DAV path
And user "Alice" has created the following folders
@ -278,5 +278,4 @@ Feature: tag search
| (tag:mathe OR tag:klass10) NOT tag:physik | 2 | /answers | /verification work |
| tag:mathe NOT name:exercises | 1 | /answers | |
| tag:mathe AND NOT name:exercises | 1 | /answers | |
# The third finding is the personal space itself
| NOT tag:mathe | 3 | /verification work | /withoutTagFolder |
| NOT tag:mathe | 2 | /verification work | /withoutTagFolder |